
    i)                        d Z ddlZddlmZ ddlmZmZ ddlmZmZm	Z	 ddl
mZ ddlZddlZ G d d          Zed	k    r ed
          Ze                                Z ed ee           d           edd         D ]Z eded          ded                      ere                    ed         d         ed         d                   Z eded         d          d            eded                      ed eed                               ed         r0 ed eed         d         d                    d           dS dS dS dS )z8
HKJC Data Collector - Updated for current HKJC website
    N)BeautifulSoup)datetime	timedelta)ListDictOptional)loggerc                       e Zd ZdZdZddedefdZdee	         fd	Z
ddedede	fdZdededede	fdZdee	         fdZdedede	fdZddededee	         fdZdS )HKJCCollectorz&Collects public data from HKJC websitez#https://www.hkjc.com/english/racing       @   delaytimeoutc                     || _         || _        t          j                    | _        | j        j                            dddd           d S )NzoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36z?text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8zen-US,en;q=0.9)z
User-AgentAcceptzAccept-Language)r   r   requestsSessionsessionheadersupdate)selfr   r   s      d/home/node/.openclaw/workspace-simple/hk-racing-analytics/data_pipeline/collectors/hkjc_collector.py__init__zHKJCCollector.__init__   s]    
'))## LW/%
 %
 	 	 	 	 	    returnc                 \   | j          d}	 | j                            || j                  }t	          j        | j                   t          |j        d          }g }|	                                }t          j        d|          }|rs|                                \  }}}	|	 | | t          j        d          }
|
                                dk    rdnd}|                    |
||dk    rd	nd
d           t          j                    }t%          d          D ]}|t'          |          z   }|                                dv rn|                    d          t+          fd|D                       s>|                                dk    rdnd}|                    |||dk    rd	nd
d           |                    d            t/          j        dt3          |           d           |dd         S # t4          $ r#}t/          j        d|            g cY d}~S d}~ww xY w)z!Get upcoming race dates from HKJC/)r   lxmlz&Next Meeting.*?(\d{2})/(\d{2})/(\d{4})z%Y%m%d   HVSTzHappy ValleyzSha Tin)datedate_objvenuedescription   )days)r         c              3   0   K   | ]}|d          k    V  dS )r"   N ).0ddate_strs     r   	<genexpr>z8HKJCCollector.get_upcoming_race_dates.<locals>.<genexpr>B   s,      DDqyH4DDDDDDr   c                     | d         S )Nr"   r+   )xs    r   <lambda>z7HKJCCollector.get_upcoming_race_dates.<locals>.<lambda>L   s
    QvY r   )keyFound  race datesNzError getting race dates: )BASE_URLr   getr   timesleepr   r   contentget_textresearchgroupsr   strptimeweekdayappendnowranger   strftimeanysortr	   infolen	Exceptionerror)r   urlresponsesoupdatestextmatchdaymonthyearr#   r$   todayi
check_dateer.   s                   @r   get_upcoming_race_datesz%HKJCCollector.get_upcoming_race_dates   s   !!!3	|''T\'BBHJtz""" !16::DE ==??D IGNNE #(<<>> UD"0E0300#,Xx@@ !) 0 0 2 2a 7 7T$ ("5:d]]>>		     LNNE2YY  "YA%6%6%66
%%''944)228<<HDDDDeDDDDD (2(:(:(<(<(A(At$,(2%*=Bd]]>>PY	& &    JJ..J///K8U888999": 	 	 	L9a99:::IIIIII	s   G0G> >
H+H& H+&H+r!   	race_dater$   c                    | j          d}d|i}	 | j                            ||| j                  }t	          j        | j                   |j        dk    r%d|j        vr| 	                    |j        ||          S t          j        d|            |                     ||          S # t          $ r7}t          j        d|            |                     ||          cY d}~S d}~ww xY w)	z
        Get race card for a specific date
        
        Args:
            race_date: YYYYMMDD format
            venue: ST (Sha Tin) or HV (Happy Valley)
        z/racecard.aspRaceDateparamsr      404zRace card not available for zError fetching race card: N)r6   r   r7   r   r8   r9   r   status_coderO   _parse_race_cardr	   warning_create_mock_race_cardrI   rJ   )r   rY   r$   rK   r]   rL   rW   s          r   get_race_cardzHKJCCollector.get_race_cardU   s    ---i(	A|''FDL'QQHJtz"""#s**uHM/I/I,,X]IuMMMIiIIJJJ229eDDD 	A 	A 	AL9a99:::..y%@@@@@@@@	As$   A*B( ;,B( (
C)2,C$C)$C)htmlc                 v   t          |d          }||g d}|                    d          }d}|D ]}dt          |                    dd                                                    v s7dt          |                    dd                                                    v r|d	z  }| d
| d
| |dg d}	|                    d          d	d         }
|
D ]?}|                     |          }|r&|	d         |d<   |	d                             |           @|	d         r|d                             |	           	|S )zParse race card HTMLr   rY   r$   racestabler   raceclass runner   _)race_idrace_numberdistancerunnerstrNrp   rs   rh   )r   find_allstrr7   lower_parse_runner_rowrA   )r   re   rY   r$   rM   	race_cardtablesrace_numri   rj   rowsrowrm   s                r   ra   zHKJCCollector._parse_race_cardo   s}   T6** #
 
	 w'' 	4 	4EUYYw3344::<<<<CPUPYPYZacePfPfLgLgLmLmLoLo@o@oA"+@@e@@h@@#+ !!	  ~~d++ABB/ 7 7C!33C88F 7,0Oy)Y..v666	? 4g&--d333r   c           
      v   	 |                     d          }t          |          dk     rdS t          |d                             d          pd          dt          |          dk    r|d                             d          ndd	dt          |          d
k    r|d
                             d          ndddt          |          dk    r|d                             d          ndddt          |          dk    r+t          |d                             d          pd          ndddS #  Y dS xY w)zParse runner rowtdr(   Nr   Tstriprl   rn   horse_id
horse_namer   	jockey_idjockey_name   
trainer_idtrainer_name)horse_numberhorsejockeytrainerweight_carriedbarrierhandicap_rating)ru   rH   intr;   )r   r}   cellss      r   rx   zHKJCCollector._parse_runner_row   sf   	LL&&E5zzA~~t !$E!H$5$5D$5$A$A$FQ G G "CFu::PQ>>%("3"3$"3"?"?"?WY 
 "$DGJJQRNN58#4#44#4#@#@#@XZ 
 #%EHZZRS^^E!H$5$5D$5$A$A$AY[  #$FI%jjSTnn3uQx00t0<<ABBBZ[#$!  $	44s   (D3 DD3 3D8c                    t          j        d|            g }t          dd          D ]}g }t          dd          D ]L}|                    |d|dd| dd	|d
d| dd|d
d| dd|z   |d|z   | d| d| d           M|                    | d| d| |g d|dz
           d|d           |||dS )z<Create mock race card for testing when real data unavailablezCreating mock data for rn   	      H03dzHorse r   J02dzJockey r   TzTrainer r   s   F   ro   )r   r   r   r   r   r   r   rp   )i    x  i@  i  i  r   r   Turf)rp   rq   rr   trackrs   rg   )r	   rG   rC   rA   )r   rY   r$   rh   r{   rs   	horse_nums          r   rc   z$HKJCCollector._create_mock_race_card   s   9i99:::a 	 	HG"1b\\  	$-$7	$7$7$7&:y&:&: 
 &9%8%8%8'<'<'< 
 ':)&9&9&9(>9(>(>    '*Io(')I~"+@@e@@h@@#       ( LL'<<%<<(<<'LLLXXY\Z"      #
 
 	
r   c                    | j          d}d|i}	 | j                            ||| j                  }t	          j        | j                   t          |j        d          }g }|	                    d          }d}	|D ]]}
dt          |
                    dd	                                                    v r"|	d
z  }	| d| d|	 |	g d}|
	                    d          d
d         }|D ]}|	                    d          }t          |          dk    r|d                             t          |d                             d          pd          t          |d
                             d          pd          |d                             d          |d                             d          ddd           |d         r|                    |           _|S # t           $ r#}t#          j        d|            g cY d}~S d}~ww xY w)zGet race results for a datez/results.aspr[   r\   r   ri   r   resultrk   rl   rn   ro   )rp   rq   rs   rt   Nr      rs   Tr   r   r   g        )finishing_positionr   r   r   finish_timemarginzError fetching results: )r6   r   r7   r   r8   r9   r   r   r:   ru   rv   rw   rH   rA   r   r;   rI   r	   rJ   )r   rY   r$   rK   r]   rL   rM   resultsrz   r{   ri   r   r|   r}   r   rW   s                   r   get_race_resultszHKJCCollector.get_race_results   sT   ,,,i((	|''FDL'QQHJtz""" !16::DG ]]7++FH / /s599Wb#9#9::@@BBBBMH&/#D#D%#D#D(#D#D'/#% F !>>$//3D# 
 
 #T 2 2u::??"9-4469%(:K:KRV:K:W:W:\[\6]6]03E!H4E4ED4E4Q4Q4VUV0W0W.3Ah.?.?d.?.K.K/4Qx/@/@t/@/L/L/2*-6 6    i( /v...N 	 	 	L7A77888IIIIII	s   GG 
H	&H>H	H	N)r   r   )r!   )__name__
__module____qualname____doc__r6   floatr   r   r   r   rX   rv   rd   ra   r   rx   rc   r   r+   r   r   r   r      sH       004H e C    7d 7 7 7 7rA As A3 A$ A A A A4#S #S # # # # # #J    8(
 (
C (
D (
 (
 (
 (
T- -# -c -T$Z - - - - - -r   r   __main__g      ?)r   r4   r5   r   z  r"   z - r%   r$   z
Race card for :z	  Venue: z	  Races: rh   z
  Race 1: rs   z runners)r   r   bs4r   r   r   typingr   r   r   logurur	   r<   r8   r   r   	collectorrX   rN   printrH   r-   rd   cardr+   r   r   <module>r      sS           ( ( ( ( ( ( ( ( ' ' ' ' ' ' ' ' ' '       				 z z z z z z z z| zC(((I --//E	E
*33u::
*
*
*+++2A2Y 5 531V933=!1334444  M&&uQx'7q'9JKK4q&!1444555)$w-))***.##d7m,,..///= 	MEKT']1%5i%@!A!AKKKLLLLL% M M	M 	Mr   