
    Mid+                        d Z ddlmZmZ ddlmZmZmZ ddl	m
Z
 ddlmZmZ ddlZddlZ G d d          Zed	k    r ed
          Ze                                Z ed ee           d           edd         D ]Z eded          ded                      er eded         d          d           e                    ed         d         ed         d                   Z eded                      ed eed                               ed         r0 ed eed         d         d                    d           dS dS dS dS )zP
HKJC Playwright Scraper
Uses browser automation to scrape race cards from HKJC
    )sync_playwrightTimeoutError)ListDictOptional)logger)datetime	timedeltaNc                       e Zd ZdZdZddedefdZdee	         fd	Z
ddedede	fdZdededee	         fdZdee	         fdZdedede	fdZdS )HKJCPlaywrightScraperz0Scrapes HKJC using Playwright browser automationz#https://www.hkjc.com/english/racingT0u  headlesstimeoutc                 J    || _         || _        t          j        d           d S )NzPlaywright scraper initialized)r   r   r   info)selfr   r   s      m/home/node/.openclaw/workspace-simple/hk-racing-analytics/data_pipeline/collectors/hkjc_playwright_scraper.py__init__zHKJCPlaywrightScraper.__init__   s'     455555    returnc           	      b   g }t                      5 }|j                            | j                  }|                                }	 t          j        d           |                    | j         d| j	                   |
                    d| j	                   |                                }t          j        d|          }|rs|                                \  }}}	|	 | | t          j        d          }
|
                                dk    rd	nd
}|                    |
||d	k    rdndd           t          j                    }t)          d          D ]}|t+          |          z   }|                                dv rn|                    d          t/          fd|D                       s>|                                dk    rd	nd
}|                    |||d	k    rdndd           |                    d            t          j        dt3          |           d           n.# t4          $ r!}t          j        d|            Y d}~nd}~ww xY w|                                 n# |                                 w xY w	 ddd           n# 1 swxY w Y   |dd         S )z*Get upcoming race dates from HKJC homepager   zFetching HKJC homepage.../r   networkidlez&Next Meeting.*?(\d{2})/(\d{2})/(\d{4})z%Y%m%d   HVSTzHappy ValleyzSha Tin)datedate_objvenuedescription   )days)r         c              3   0   K   | ]}|d          k    V  dS )r   N ).0ddate_strs     r   	<genexpr>z@HKJCPlaywrightScraper.get_upcoming_race_dates.<locals>.<genexpr>=   s,      "H"HQ1V9#8"H"H"H"H"H"Hr   c                     | d         S )Nr   r(   )xs    r   <lambda>z?HKJCPlaywrightScraper.get_upcoming_race_dates.<locals>.<lambda>F   s
    6 r   )keyFound  race dateszError fetching race dates: N)r   chromiumlaunchr   new_pager   r   gotoBASE_URLr   wait_for_load_statecontentresearchgroupsr	   strptimeweekdayappendnowranger
   strftimeanysortlen	Exceptionerrorclose)r   datespbrowserpager9   matchdaymonthyearr    r!   todayi
check_dateer+   s                   @r   get_upcoming_race_datesz-HKJCPlaywrightScraper.get_upcoming_race_dates   sB    1	 !j'''??G##%%D- 7888		T]---t|	DDD(((MMM ,,.. 	"KWUU ',||~~$C"&44s44H'08DDH$,$4$4$6$6!$;$;DDELL ($,!&9>$~~I	" "    !r  A!&):):):!:J!))++y88#-#6#6x#@#@""H"H"H"H%"H"H"HHH ,6,>,>,@,@A,E,EDD4E!LL(0,6).AF$~~T]	* *    

22
333<SZZ<<<==== @ @ @>1>>????????@ c1	  1	  1	  1	  1	  1	  1	  1	  1	  1	  1	  1	  1	  1	  1	 f SbSzsN   5JG*H32I63
I=II6II6!J6JJJ #J r   	race_dater!   c           	      \   ||g d}t                      5 }|j                            | j                  }|                                }	 | j         d| d| d| d| g}|D ]}t          j        d|            	 |                    || j	                   |
                    d	| j	                   |                                }	d
|	vrJd|	                                vr4t          j        d           |                     |||          }
|
r|
|d<    nU# t          $ r t          j        d|            Y t           $ r%}t          j        d| d|            Y d}~d}~ww xY w|d         s*t          j        d           |                     ||          }nD# t           $ r7}t          j        d|            |                     ||          }Y d}~nd}~ww xY w|                                 n# |                                 w xY w	 ddd           n# 1 swxY w Y   |S )z
        Get race card using Playwright
        
        Args:
            race_date: YYYYMMDD format
            venue: ST (Sha Tin) or HV (Happy Valley)
        rV   r!   racesr   z/racecard.asp?RaceDate=zGhttps://racing.hkjc.com/racing/english/racecard/racecard.aspx?RaceDate=z&Racecourse=zQhttps://racing.hkjc.com/racing/information/english/racing/racecard.aspx?RaceDate=zTrying: r   r   404zcannot be foundzFound valid race card pagerY   zTimeout for z
Error for z: Nz&No race card found, creating mock datazError fetching race card: )r   r3   r4   r   r5   r7   r   r   r6   r   r8   r9   lower_parse_race_card_pagePlaywrightTimeoutwarningrF   _create_mock_race_cardrG   rH   )r   rV   r!   	race_cardrJ   rK   rL   urls_to_tryurlr9   rY   rT   s               r   get_race_cardz#HKJCPlaywrightScraper.get_race_cardP   s    #
 
	  .	 !j'''??G##%%D*  }HHYHH|^g||uz||shqss ' ! !CK 03 0 0111!		#t|	<<<000UUU #',,.. //4EW]]__4\4\"K(EFFF %)$>$>tYPU$V$VE$ &5:	' 2 %, ! ! !';c';';<<< $ ! ! !'>C'>'>1'>'>??? !
 !) NN#KLLL $ ; ;Iu M MI J J J=!==>>> 77	5II						J ].	  .	  .	  .	  .	  .	  .	  .	  .	  .	  .	  .	  .	  .	  .	 ` s   5H!4F" BDF"!E,<F">	E,E'!F"'E,,5F"!G;"
G#,-GG;G##G;&H!;HH!!H%(H%c                 
   g }	 |                     d          }d}|D ]}|                                }d|                                v sd|                                v r|dz  }| d| d| |ddg d}	t          j        d	|          }
|
r%t          |
                    d                    |	d
<   |                     d          }|dd         D ]?}|                     |          }|r&|	d         |d<   |	d                             |           @|	d         r|                    |	           t          j
        dt          |           d           n.# t          $ r!}t          j        d|            Y d}~nd}~ww xY w|S )z$Parse race card from Playwright pagetabler   horsejockey   _Turfrace_idrace_numberdistancetrackrunnersz(\d+)mrn   trNrl   rp   zParsed z raceszError parsing race card: )query_selector_all
inner_textr[   r:   r;   intgroup_parse_runner_rowr?   r   r   rE   rF   rG   )r   rL   rV   r!   rY   tablesrace_numre   
table_textracedistance_matchrowsrowrunnerrT   s                  r   r\   z+HKJCPlaywrightScraper._parse_race_card_page   s   &	:,,W55FH + +"--//
j..0000H
@P@P@R@R4R4RMH '0#D#D%#D#D(#D#D'/$%!'#% D &(Yy*%E%EN% H+.~/C/CA/F/F+G+GZ( !33D99D#ABBx ; ;!%!7!7!<!<! ;04YF9- O226:::I +T***K4#e**4445555 	: 	: 	:L8Q8899999999	: s   EE 
F E;;F c           	      d   	 |                     d          }t          |          dk     rdS t          |d                                                                         pd          dt          |          dk    r,|d                                                                         ndddt          |          dk    r,|d                                                                         ndd	dt          |          d
k    r,|d
                                                                         ndddt          |          dk    r;t          |d                                                                         pd          ndddS # t
          $ r"}t          j        d|            Y d}~dS d}~ww xY w)zParse runner row from tabletdr%   Nr    rh   horse_id
horse_namer   	jockey_idjockey_name   
trainer_idtrainer_name)horse_numberrf   rg   trainerweight_carriedbarrierhandicap_ratingzError parsing runner: )rr   rE   rt   rs   striprF   r   rG   )r   r}   cellsrT   s       r   rv   z'HKJCPlaywrightScraper._parse_runner_row   s   	**400E5zzA~~t !$E!H$7$7$9$9$?$?$A$A$FQ G G "CFu::PQ>>%("5"5"7"7"="="?"?"?WY 
 "$DGJJQRNN58#6#6#8#8#>#>#@#@#@XZ 
 #%EHZZRS^^E!H$7$7$9$9$?$?$A$A$AY[  #$FI%jjSTnn3uQx2244::<<ABBBZ[#$!  $  	 	 	L5!5566644444	s   (F EF 
F/F**F/c                    t          j        d|            g }t          dd          D ]}g }t          dd          D ]L}|                    |d|dd| dd	|d
d| dd|d
d| dd|z   |d|z   | d| d| d           M|                    | d| d| |g d|dz
           d|d           |||dS )z!Create mock race card for testingzCreating mock data for rh   	      H03dzHorse r   J02dzJockey r   TzTrainer r   s   F   ri   )r   rf   rg   r   r   r   r   rl   )i    x  i@  i  i  r   r   rj   rk   rX   )r   r   rA   r?   )r   rV   r!   rY   rx   rp   	horse_nums          r   r_   z,HKJCPlaywrightScraper._create_mock_race_card   s   9i99:::a 	 	HG"1b\\  	$-$7	$7$7$7&:y&:&: 
 &9%8%8%8'<'<'< 
 ':)&9&9&9(>9(>(>    '*Io(')I~"+@@e@@h@@#       ( LL'<<%<<(<<'LLLXXY\Z"      #
 
 	
r   N)Tr   )r   )__name__
__module____qualname____doc__r7   boolrt   r   r   r   rU   strrc   r\   r   rv   r_   r(   r   r   r   r      s       ::4H6 6 6s 6 6 6 6
7d 7 7 7 7r> >s >3 >$ > > > >@,S , ,d , , , ,\    :(
 (
C (
D (
 (
 (
 (
 (
 (
r   r   __main__Tr   r1   r2   r   z  r   z - r"   z
Fetching race card for z...r!   zVenue: zRaces: rY   z
First race: rp   z runners)r   playwright.sync_apir   r   r]   typingr   r   r   logurur   r	   r
   r:   timer   r   scraperrU   rI   printrE   r*   rc   cardr(   r   r   <module>r      sR    S R R R R R R R ' ' ' ' ' ' ' ' ' '       ( ( ( ( ( ( ( ( 				 v
 v
 v
 v
 v
 v
 v
 v
t z##T222G ++--E	E
*33u::
*
*
*+++2A2Y 5 531V933=!1334444  O?%(6*:???@@@$$U1Xf%5uQx7HII'W''(((,DM**,,---= 	OEM33tG}Q'7	'B#C#CMMMNNNNN' O O	O 	Or   