
    鶲i3              
       $   d Z ddlmZ ddlmZmZ ddlmZ ddlZddl	Z	ddl
m
Z
  e	j        dd          ZdZd	Z G d
 d          Z G d d          Zedk    r ed          Ze                    ddd          Z ed eed                    d           ed         dd         D ]dZ eded          ded                      eded          ded                      ed ed!          d"ed#          d$ed%                     cdS dS )&zq
HKJC Race Card Scraper for Notion - Fixed Version
Correctly extracts all fields from the correct cell positions
    )sync_playwright)ListDict)loggerN)datetimeNOTION_API_KEY2ntn_678392193454QPK9kFb3QDf147tRH47dTl5nUeK3FU4f6Tz
2022-06-28c                   d    e Zd ZdZdefdZddedee         fdZded	edefd
Z	dededefdZ
dS )NotionClientzClient for Notion APIapi_keyc                 H    || _         d| dt          d| _        d| _        d S )NzBearer zapplication/json)AuthorizationzContent-TypezNotion-Versionzhttps://api.notion.com/v1)r   NOTION_VERSIONheadersbase_url)selfr   s     i/home/node/.openclaw/workspace-simple/hk-racing-analytics/data_pipeline/collectors/hkjc_notion_scraper.py__init__zNotionClient.__init__   s5    0w00.,
 

 4     queryreturnc                     | j          d}|dddd}t          j        || j        |          }|                                 |                                                    dg           S )zSearch for databasesz/searchobjectdatabase)propertyvalue)r   filterr   jsonresults)r   requestspostr   raise_for_statusr    get)r   r   urlpayloadresponses        r   search_databaseszNotionClient.search_databases   sv    '''#+jAA
 
 =dlIII!!###}}""9b111r   parent_page_idtitlec           
      x   | j          d}d|idd|idgi ddi idd	i id
ddddddddgiiddi iddi iddi iddi iddi iddi iddi iddi iddi iddi iddi iddi id di id!di id"}t          j        || j        |#          }|                                 |                                S )$zCreate a new database in Notionz
/databasespage_idtextcontent)typer.   Race IDr+   DatedateVenueselectoptionsSha Tinblue)namecolorHappy ValleygreenRace NumbernumberHorse Number
Horse Name	rich_textHorse IDJockeyTrainerWeightBarrierRatingLast 6 RunsAgeSexGearOwner)parentr+   
propertiesr   r   r"   r#   r   r$   r    )r   r*   r+   r&   r'   r(   s         r   create_databasezNotionClient.create_database)   s   *** !.1%	5/ABBCGR= (Y&88+g>>1 %  "~ 2 {B/ ["- ;+ K, 8R. Hb> 8R.  R0!" "~#$ R(%& b)'( +r*)
 
6 =dlIII!!###}}r   database_idrN   c                     | j          d}d|i|d}t          j        || j        |          }|                                 |                                S )zAdd a page to databasez/pagesrQ   )rM   rN   r   rO   )r   rQ   rN   r&   r'   r(   s         r   add_pagezNotionClient.add_pageL   se    &&& %k2$
 

 =dlIII!!###}}r   N)r   )__name__
__module____qualname____doc__strr   r   r   r)   rP   rS    r   r   r   r      s        4 4 4 4 4	2 	2c 	24: 	2 	2 	2 	2!c !# !$ ! ! ! !FC T d      r   r   c            
           e Zd ZdZdZddefdZdededed	e	fd
Z
d	e	fdZdededed	ee	         fdZddededed	ee	         fdZdee	         defdZdS )HKJCRaceCardScraperz,Scrapes HKJC race cards and stores in Notionz8https://racing.hkjc.com/en-us/local/information/racecardTheadlessc                 n    || _         t          t                    | _        t	          j        d           d S )Nz"HKJC Race Card Scraper initialized)r\   r   r   notionr   info)r   r\   s     r   r   zHKJCRaceCardScraper.__init___   s/     ">22899999r   r3   venuerace_nor   c                 8   |||g d}|                     dd          }| j         d| d| d| }t                      5 }|j                            | j                  }|                                }		 t          j        d|            |		                    |d	
           |	
                    d           |                     |	          }
|                    |
           |                     |	|||          }||d<   t          j        dt          |           d|            n.# t          $ r!}t          j        d|            Y d}~nd}~ww xY w|                                 n# |                                 w xY w	 ddd           n# 1 swxY w Y   |S )zScrape a single race)r3   r`   race_numberrunners-/z
?racedate=z&Racecourse=z&RaceNo=r\   z
Scraping: i`  )timeouti  rd   z
Extracted z runners from Race zError scraping race: N)replaceBASE_URLr   chromiumlaunchr\   new_pager   r_   gotowait_for_timeout_extract_race_infoupdate_extract_runnerslen	Exceptionerrorclose)r   r3   r`   ra   	race_datadate_urlr&   pbrowserpage	race_inford   es                r   scrape_racezHKJCRaceCardScraper.scrape_raced   s    "	
 
	 <<S))XX(XXXXwXX 	 !j'''??G##%%D ...///		#u	---%%d+++ 33D99	  +++//dE7KK'.	)$SWSS'SSTTTT : : :8Q8899999999: )	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 , sN   5F4B1D&%E)&
E0EE)EE)F)E??FFFc                    i }	 |                     d          }ddl}|                    d|          }|r%t          |                    d                    |d<   d|v rd|d	<   nd
|d	<   |                    d|          }|r|                    d          |d<   n.# t
          $ r!}t          j        d|            Y d}~nd}~ww xY w|S )zExtract race informationbodyr   Nz(\d+)M   distancezAll WeatherAWTtrackTurfz\$([0-9,]+)prizezError extracting race info: )
inner_textresearchintgrouprt   r   ru   )r   r{   r_   r.   r   
dist_matchprize_matchr}   s           r   rp   z&HKJCRaceCardScraper._extract_race_info   s   	=??6**DIII9d33J <#&z'7'7':':#;#;Z $$ %W &W))ND99K 5 + 1 1! 4 4W 	= 	= 	=L;;;<<<<<<<<	= s   BB 
C$C  Cc                 |   g }	 |                     d          }|st          j        d           g S |                    d          }|D ]C}|                    d          }	t	          |	          dk     r,|	d                                                                         }
|
                                smt          |
          }|	d         }|                     d          }|rq|                                                                }|	                    d	          pd
}ddl
}|                    d|          }|r|                    d          nd
}n(|                                                                }d
}|s8t	          |	          dk    r,|	d                                                                         nd
}t	          |	          dk    r,|	d                                                                         nd
}t	          |	          dk    r,|	d                                                                         nd}|                                rt          |          nd}t	          |	          dk    r,|	d                                                                         nd
}t	          |	          dk    r,|	d                                                                         nd}|                                rt          |          nd}t	          |	          dk    r,|	d                                                                         nd
}t	          |	          dk    r,|	d                                                                         nd}|                                rt          |          nd}t	          |	          dk    r,|	d                                                                         nd
}t	          |	          dk    r,|	d                                                                         nd}|                                rt          |          nd}t	          |	          dk    r,|	d                                                                         nd
} t	          |	          dk    r,|	d                                                                         nd
}!t	          |	          dk    r,|	d                                                                         nd
}"i d|                    dd
           d| d| d|d|d|d|d |d!|d"|d#|d$|d%|d&|d'| d(|d)|!d*|"d+|||d,}#|                    |#           En.# t          $ r!}$t          j        d-|$            Y d}$~$nd}$~$ww xY w|S ).z(Extract all runners from race card tableztable.starterzRace card table not foundztbody trtd   r      ahrefr   Nzhorseid=([A-Z0-9_]+)r         0      	                  race_idre   _horse_number
horse_namehorse_idbrand_nojockeytrainerweightbarrierratingrating_changeagesexlast_6_runsgearownerr3   )r`   rc   zError extracting runners: )query_selectorr   warningquery_selector_allrs   r   stripisdigitr   get_attributer   r   r   ri   appendrt   ru   )%r   r{   r3   r`   ra   rd   tablerowsrowcellshorse_num_textr   
horse_cell
horse_linkr   r   r   horse_id_matchr   last_6r   weight_textr   r   barrier_textr   r   rating_textr   r   age_textr   r   r   r   runnerr}   s%                                        r   rr   z$HKJCRaceCardScraper._extract_runners   s   ^	;''88E :;;;	 ++J77D Q' Q'..t44u::?? "'q!4!4!6!6!<!<!>!>%--// ">22 #1X
'66s;;
 	"!+!6!6!8!8!>!>!@!@J%33F;;ArDIII%'YY/F%M%MN:HP~33A666bHH!+!6!6!8!8!>!>!@!@J!H!  ;>e**q..q,,..44666b<?JJNN58..0066888PR?B5zzA~~eAh113399;;;SV-8-@-@-B-BI[))):=e**q..q,,..44666b@CE

QuQx2244::<<<TW/;/C/C/E/EL#l+++1;>u::>>%(--//55777r@CE

ReBi2244::<<<UX-8-@-@-B-BI[))) CFe**r//b	 4 4 6 6 < < > > >WY=@ZZ"__59//1177999RU'/'7'7'9'9@c(mmmq8;E

ReBi**,,22444R9<UbuRy++--33555b:=e**r//b	,,..44666r$,,sB"7"7KK%KK'KK"L !* 	
  f w f w f $] 3 3 "6 D  U!" D#$ ##*'  , v&&&&cQ'f  	; 	; 	;L9a99::::::::	; s   ,V UV 
V9V44V9r   	num_racesc           
         g }t          d|dz             D ]v}t          j        d| d|            |                     |||          }|                    |           t          j        d| dt          |d                    d           w|S )zScrape all races for a dayr   z
Scraping Race rf   u     ✓ Race : rd    runners)ranger   r_   r~   r   rs   )r   r3   r`   r   	all_racesra   races          r   scrape_all_racesz$HKJCRaceCardScraper.scrape_all_races  s    	Q	A.. 	Q 	QGK@7@@Y@@AAA##D%99DT"""KOgOOT)_1E1EOOOPPPPr   racesrQ   c           
         t          j        dt          |           d           d}|D ]}|d         D ]}	 i dddd|d	         iigid
dd|d         iiddd|d         dk    rdndiidd|d         idd|d         idddd|d         dd         iigidddd|d         iigid ddd|d!         dd         iigid"ddd|d#         dd         iigid$d|d%         id&d|d'         id(d|d)         id*ddd|d+         iigid,d|d-         id.ddd|d/         iigid0ddd|d1         dd         iigid2ddd|d3         dd         iigi}| j                            ||           |d4z  }t          j        d5|d          d6|d                     j# t
          $ r+}t          j        d7|d          d8|            Y d}~d}~ww xY wt          j        d9| d:           |S );z!Save all races to Notion databasez
Saving z races to Notion...r   rd   r1   r+   r.   r/   r   r2   r3   startr4   r5   r9   r`   STr7   r;   r=   r>   rc   r?   r   r@   rA   r   Nd   rB   r   rC   r   rD   r   rE   r   rF   r   rG   r   rH   r   rI   r   rJ   r   rK   r   rL   r   r   u     ✓ . u     ✗ Error saving r   u   
✅ Saved z runners to Notion)r   r_   rs   r^   rS   rt   ru   )r   r   rQ   total_runnersr   r   rN   r}   s           r   save_to_notionz"HKJCRaceCardScraper.save_to_notion  sn   ?E

???@@@ 	T 	TDy/ T TT"!#v	6);L/M&N%O$" '6&>)B C	"
  (V&/UYBYBYYY_m,n!o" &&2G'H" '6.3I(J" %{fy&Q]J^_c`c_cJd>e5f4g&h" #[FYzHZ<[3\2]$^" !;&9fXFVW[X[W[F\:]1^0_"`" "K6IviGXY]Z]Y]G^;_2`1a#b" !8VH-=">" "HfY.?#@" !8VH-=">" &v	6R_K`?a6b5c'd"  &-8!"" v	6%=7Q.R-ST#"$ F6NSWTWSWDX8Y/Z.[ \%"&  +)VG_UYVYUYEZ9[0\/]!^'"J, K((jAAA!Q&MK Y)? Y Y6,CW Y YZZZZ  T T TL!Rvl7K!R!Rq!R!RSSSSSSSST9T> 	D=DDDEEEs   E$F
G) GGN)T)r   )rT   rU   rV   rW   rj   boolr   rX   r   r   r~   rp   r   rr   r   r   rY   r   r   r[   r[   Z   s)       66IH: : : : : :
" "C "# "$ " " " "H$    4d3 ds dS dTRVZ d d d dL
 
S 
 
 
dSWj 
 
 
 
'DJ 'S ' ' ' ' ' 'r   r[   __main__Trg   z
2026-03-15r   r   z	
Race 1: rd   r   r   z  r   r   r   z     Jockey: r   z, Trainer: r   z	     Wt: r   z, Bar: r   z, Rtg: r   )rW   playwright.sync_apir   typingr   r   logurur   r"   osr   getenvr   NOTION_DATABASE_IDr   r   r[   rT   scraperr~   r   printrs   rrY   r   r   <module>r      s    0 / / / / /                				       +-abb E E E E E E E EPa a a a a a a aJ z!!4000G |T155D	E
5ss4	?++
5
5
5666)_RaR  R R91^$99,99:::DakDDa	lDDEEEP!H+PPa	lPP1X;PPQQQQ R Rr   