
    iB%              
       V   d Z ddlmZmZ ddlmZmZmZ ddl	m
Z
 ddlmZmZ ddlZ G d d          Zed	k    r ed
          Ze                                Z eded         d                     e                    ed         d         ed         d                   Z ed eed                               ed         dd         D ]YZ eded          d eed                    d           ed         r& eded         d         d         d                     XdS dS )z3
HKJC Playwright Scraper - Extracts Real Race Data
    )sync_playwrightTimeoutError)ListDictOptional)logger)datetime	timedeltaNc                       e Zd ZdZdZddedefdZdd	ed
ede	fdZ
d	ed
edee	         fdZdedee	         fdZd	ed
edee	         fdZdee	         fdZdS )HKJCRealScraperz1Scrapes real race data from HKJC using PlaywrightzGhttps://racing.hkjc.com/racing/information/english/racing/racecard.aspxT`  headlesstimeoutc                 J    || _         || _        t          j        d           d S )NzHKJC Real Scraper initialized)r   r   r   info)selfr   r   s      g/home/node/.openclaw/workspace-simple/hk-racing-analytics/data_pipeline/collectors/hkjc_real_scraper.py__init__zHKJCRealScraper.__init__   s'     344444    ST	race_datevenuereturnc                    ||g d}t                      5 }|j                            | j                  }|                                }	 t          j        d|            |                    | j        | j	                   |
                    d           |                     |||          }||d<   t          j        dt          |           d           n.# t          $ r!}t          j        d	|            Y d
}~nd
}~ww xY w|                                 n# |                                 w xY w	 d
d
d
           n# 1 swxY w Y   |S )z
        Get race card for a specific date
        
        Args:
            race_date: YYYYMMDD format
            venue: ST (Sha Tin) or HV (Happy Valley)
        )r   r   racesr   zFetching race card for )r   i  r   zParsed z raceszError fetching race card: N)r   chromiumlaunchr   new_pager   r   gotoBASE_URLr   wait_for_timeout_parse_raceslen	Exceptionerrorclose)	r   r   r   	race_cardpbrowserpager   es	            r   get_race_cardzHKJCRealScraper.get_race_card   s    #
 
	  	 !j'''??G##%%D AiAABBB		$-	>>>%%d+++ ))$	5AA%*	'"8c%jj8889999 ? ? ?=!==>>>>>>>>? %	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 ( sN   5EBCD
D$D ;D DDED33EE
Ec                    g }	 |                     d          }t          j        dt          |           d           d}|D ]}|                     d          }t          |          dk    rx|dz  }| d| d| |ddg d	}	|dd
         D ];}
|                     |
|	d                   }|r|	d                             |           <|	d         r|                    |	           |s+t          j        d           |                     |||          }n.# t          $ r!}t          j        d|            Y d
}~nd
}~ww xY w|S )zParse all races from the pageztable.bg_whiteFound z race tablesr   tr   _Turfrace_idrace_numberdistancetrackrunnersNr5   r9   z$Trying alternative parsing method...zError parsing races: )	query_selector_allr   r   r$   _parse_runnerappend_parse_races_alternativer%   r&   )r   r+   r   r   r   race_tablesrace_numtablerowsracerowrunnerr,   s                r   r#   zHKJCRealScraper._parse_races9   s   %	6 112BCCKK?[!1!1???@@@H$ + +//55t99q==MH&/#D#D%#D#D(#D#D'/$%!'#% D  $ABBx ; ;!%!3!3Ci!I!I! ; O226:::I +T***  NBCCC55dIuMM 	6 	6 	6L44455555555	6 s   DD 
D>D99D>r5   c           	      ,   	 |                     d          }t          |          dk     rdS |d                                                                         }|                                rt          |          nd}|d                             d          }|rp|                                                                }|                    d          pd}t          j	        d	|          }	|	r|	
                    d          nd
|d}
n2|d                                                                         }d
|d}
|rt          |          dk     rdS ||
|dddddddd|d|dS # t          $ r"}t          j        d|            Y d}~dS d}~ww xY w)zParse a runner rowtd   Nr   r1   ahref horseid=([A-Z0-9_]+)H03d   horse_id
horse_name	jockey_idjockey_name
trainer_idtrainer_namehorse_numberhorsejockeytrainerweight_carriedbarrierhandicap_ratingr5   zError parsing runner: )r:   r$   
inner_textstripisdigitintquery_selectorget_attributeresearchgroupr%   r   r&   )r   rC   r5   cellshorse_number_textrY   
horse_linkrQ   rI   horse_id_matchrP   r,   s               r   r;   zHKJCRealScraper._parse_runnerf   s   1	**400E5zzA~~t !&a 3 3 5 5 ; ; = =5F5N5N5P5PW30111VWL q0055J 	2'2244::<<
!//77=2 "$+BD!I!I6D`>//222J`lJ`J`J`"1X002288::
1|111  Z1!4!4t !- (", 
 "$#% 
 #%$&  #$'#$"#  (  	 	 	L5!5566644444	s#   (E' D%E' E' '
F1FFc                 <   g }	 |                     d          }|                    d          }t          j        dt	          |           d           |rd}g }|D ]}	|	                                                                 }
|	                    d          pd}t          j        d|          }|r|	                    d          nd}t	          |          dz   }|
                    |||
d	ddd
dddd|d| d| d| d           t	          |          dk    r*|
                    | d| d| |dd|d           g }|dz  }|r#|
                    | d| d| |dd|d           n.# t          $ r!}t          j        d|            Y d}~nd}~ww xY w|S )zAlternative parsing methodbodyza[href*="horseid="]r/   z horse linksr1   rI   rJ   rK   rO   rR   rU   r   r2   rX      i  r3   r4   zAlternative parsing error: N)r`   r:   r   r   r$   ra   re   rf   rg   rh   r<   r%   r&   )r   r+   r   r   r   texthorse_linksr?   r9   linkrQ   rI   rl   rP   rY   r,   s                   r   r=   z(HKJCRealScraper._parse_races_alternative   s`   <	<??6**D 112GHHKK?[!1!1???@@@ 0' !& !&D!%!2!2!8!8!:!:J--f55;D%'Y/F%M%MN:HP~33A666bH#&w<<!#3LNN(4(0*4" " 132"F"F24b#I#I*+#/+,&/#D#D%#D#D(#D#D$ $    7||r))*3'H'He'H'Hh'H'H+3(,%+'.& &    #% A  LL&/#D#D%#D#D(#D#D'/$(!'#*" "     	< 	< 	<L:q::;;;;;;;;	< s   E)E. .
F8FFc                 H   g }t          j                    }t          d          D ]|}|t          |          z   }|                                dv rQ|                                dk    rdnd}|                    |                    d          |||dk    rdnd	d
           }|S )zGet upcoming race datesro   )days)rN         rN   HVr   z%Y%m%dzHappy ValleyzSha Tin)datedate_objr   description)r	   nowranger
   weekdayr<   strftime)r   datestodayi
check_dater   s         r   get_upcoming_race_datesz'HKJCRealScraper.get_upcoming_race_dates   s     r 		 		A!2!2!22J!!##y00 * 2 2 4 4 9 9t&//99 *"5:d]]>>		     r   N)Tr   )r   )__name__
__module____qualname____doc__r!   boolrc   r   strr   r-   r   r#   r   r;   r=   r    r   r   r   r      s       ;;XH5 5 5s 5 5 5 5
" "s "3 "$ " " " "H+C + +T
 + + + +Z3# 3(4. 3 3 3 3jB BC BDQUJ B B B BHd      r   r   __main__Tr   zTesting with date: rx   r   z
Races: r   rN   z  Race r6   z: r9   z runnersz    First: rZ   rQ   )r   playwright.sync_apir   r   PlaywrightTimeouttypingr   r   r   logurur   r	   r
   rf   r   r   scraperr   r   printr-   cardr$   rB   r   r   r   <module>r      s    S R R R R R R R ' ' ' ' ' ' ' ' ' '       ( ( ( ( ( ( ( ( 				e e e e e e e eR zot,,,G++--E	E
2a 0
2
2333  q&!158G3DEED	E
*cc$w-((
*
*+++Wbqb! M MM]+MMss4	?/C/CMMMNNN	? 	MEKY 27 ;L IKKLLL M Mr   