"""
HKJC Race Card Scraper for Notion - Fixed Version
Correctly extracts all fields from the correct cell positions
"""
from playwright.sync_api import sync_playwright
from typing import List, Dict
from loguru import logger
import requests
import os
from datetime import datetime

# Notion API Configuration
NOTION_API_KEY = os.getenv("NOTION_API_KEY", "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")
NOTION_DATABASE_ID = None
NOTION_VERSION = "2022-06-28"


class NotionClient:
    """Client for Notion API"""
    
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
            "Notion-Version": NOTION_VERSION
        }
        self.base_url = "https://api.notion.com/v1"
    
    def search_databases(self, query: str = "") -> List[Dict]:
        """Search for databases"""
        url = f"{self.base_url}/search"
        payload = {
            "query": query,
            "filter": {"property": "object", "value": "database"}
        }
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json().get('results', [])
    
    def create_database(self, parent_page_id: str, title: str) -> Dict:
        """Create a new database in Notion"""
        url = f"{self.base_url}/databases"
        
        payload = {
            "parent": {"page_id": parent_page_id},
            "title": [{"type": "text", "text": {"content": title}}],
            "properties": {
                "Race ID": {"title": {}},
                "Date": {"date": {}},
                "Venue": {"select": {"options": [
                    {"name": "Sha Tin", "color": "blue"},
                    {"name": "Happy Valley", "color": "green"}
                ]}},
                "Race Number": {"number": {}},
                "Horse Number": {"number": {}},
                "Horse Name": {"rich_text": {}},
                "Horse ID": {"rich_text": {}},
                "Jockey": {"rich_text": {}},
                "Trainer": {"rich_text": {}},
                "Weight": {"number": {}},
                "Barrier": {"number": {}},
                "Rating": {"number": {}},
                "Last 6 Runs": {"rich_text": {}},
                "Age": {"number": {}},
                "Sex": {"rich_text": {}},
                "Gear": {"rich_text": {}},
                "Owner": {"rich_text": {}}
            }
        }
        
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json()
    
    def add_page(self, database_id: str, properties: Dict) -> Dict:
        """Add a page to database"""
        url = f"{self.base_url}/pages"
        
        payload = {
            "parent": {"database_id": database_id},
            "properties": properties
        }
        
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json()


class HKJCRaceCardScraper:
    """Scrapes HKJC race cards and stores in Notion"""
    
    BASE_URL = "https://racing.hkjc.com/en-us/local/information/racecard"
    
    def __init__(self, headless: bool = True):
        self.headless = headless
        self.notion = NotionClient(NOTION_API_KEY)
        logger.info("HKJC Race Card Scraper initialized")
    
    def scrape_race(self, date: str, venue: str, race_no: int) -> Dict:
        """Scrape a single race"""
        race_data = {
            'date': date,
            'venue': venue,
            'race_number': race_no,
            'runners': []
        }
        
        date_url = date.replace('-', '/')
        url = f"{self.BASE_URL}?racedate={date_url}&Racecourse={venue}&RaceNo={race_no}"
        
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=self.headless)
            page = browser.new_page()
            
            try:
                logger.info(f"Scraping: {url}")
                page.goto(url, timeout=60000)
                page.wait_for_timeout(3000)
                
                race_info = self._extract_race_info(page)
                race_data.update(race_info)
                
                runners = self._extract_runners(page, date, venue, race_no)
                race_data['runners'] = runners
                
                logger.info(f"Extracted {len(runners)} runners from Race {race_no}")
                
            except Exception as e:
                logger.error(f"Error scraping race: {e}")
            finally:
                browser.close()
        
        return race_data
    
    def _extract_race_info(self, page) -> Dict:
        """Extract race information"""
        info = {}
        
        try:
            text = page.inner_text('body')
            
            import re
            dist_match = re.search(r'(\d+)M', text)
            if dist_match:
                info['distance'] = int(dist_match.group(1))
            
            if 'All Weather' in text:
                info['track'] = 'AWT'
            else:
                info['track'] = 'Turf'
            
            prize_match = re.search(r'\$([0-9,]+)', text)
            if prize_match:
                info['prize'] = prize_match.group(1)
            
        except Exception as e:
            logger.error(f"Error extracting race info: {e}")
        
        return info
    
    def _extract_runners(self, page, date: str, venue: str, race_no: int) -> List[Dict]:
        """Extract all runners from race card table"""
        runners = []
        
        try:
            table = page.query_selector('table.starter')
            
            if not table:
                logger.warning("Race card table not found")
                return []
            
            # Get data rows from tbody only
            rows = table.query_selector_all('tbody tr')
            
            for row in rows:
                cells = row.query_selector_all('td')
                
                if len(cells) < 12:
                    continue
                
                # Cell positions (from analysis):
                # [0] Horse No, [1] Last 6 Runs, [3] Horse, [5] Wt, 
                # [6] Jockey, [8] Draw, [9] Trainer, [11] Rtg
                
                horse_num_text = cells[0].inner_text().strip()
                if not horse_num_text.isdigit():
                    continue
                
                horse_number = int(horse_num_text)
                
                # Horse name (cell 3)
                horse_cell = cells[3]
                horse_link = horse_cell.query_selector('a')
                
                if horse_link:
                    horse_name = horse_link.inner_text().strip()
                    href = horse_link.get_attribute('href') or ''
                    
                    import re
                    horse_id_match = re.search(r'horseid=([A-Z0-9_]+)', href)
                    horse_id = horse_id_match.group(1) if horse_id_match else ''
                else:
                    horse_name = horse_cell.inner_text().strip()
                    horse_id = ''
                
                if not horse_name:
                    continue
                
                # Extract fields from CORRECT positions
                last_6 = cells[1].inner_text().strip() if len(cells) > 1 else ''
                brand_no = cells[4].inner_text().strip() if len(cells) > 4 else ''
                
                weight_text = cells[5].inner_text().strip() if len(cells) > 5 else '0'
                weight = int(weight_text) if weight_text.isdigit() else 0
                
                jockey = cells[6].inner_text().strip() if len(cells) > 6 else ''
                
                barrier_text = cells[8].inner_text().strip() if len(cells) > 8 else '0'
                barrier = int(barrier_text) if barrier_text.isdigit() else 0
                
                trainer = cells[9].inner_text().strip() if len(cells) > 9 else ''
                
                rating_text = cells[11].inner_text().strip() if len(cells) > 11 else '0'
                rating = int(rating_text) if rating_text.isdigit() else 0
                
                # Additional fields
                rating_change = cells[12].inner_text().strip() if len(cells) > 12 else ''
                age_text = cells[16].inner_text().strip() if len(cells) > 16 else '0'
                age = int(age_text) if age_text.isdigit() else 0
                sex = cells[18].inner_text().strip() if len(cells) > 18 else ''
                gear = cells[22].inner_text().strip() if len(cells) > 22 else ''
                owner = cells[23].inner_text().strip() if len(cells) > 23 else ''
                
                runner = {
                    'race_id': f"{date.replace('-', '')}_{venue}_{race_no}",
                    'horse_number': horse_number,
                    'horse_name': horse_name,
                    'horse_id': horse_id,
                    'brand_no': brand_no,
                    'jockey': jockey,
                    'trainer': trainer,
                    'weight': weight,
                    'barrier': barrier,
                    'rating': rating,
                    'rating_change': rating_change,
                    'age': age,
                    'sex': sex,
                    'last_6_runs': last_6,
                    'gear': gear,
                    'owner': owner,
                    'date': date,
                    'venue': venue,
                    'race_number': race_no
                }
                
                runners.append(runner)
            
        except Exception as e:
            logger.error(f"Error extracting runners: {e}")
        
        return runners
    
    def scrape_all_races(self, date: str, venue: str, num_races: int = 11) -> List[Dict]:
        """Scrape all races for a day"""
        all_races = []
        
        for race_no in range(1, num_races + 1):
            logger.info(f"\nScraping Race {race_no}/{num_races}")
            race = self.scrape_race(date, venue, race_no)
            all_races.append(race)
            logger.info(f"  ✓ Race {race_no}: {len(race['runners'])} runners")
        
        return all_races
    
    def save_to_notion(self, races: List[Dict], database_id: str):
        """Save all races to Notion database"""
        logger.info(f"\nSaving {len(races)} races to Notion...")
        
        total_runners = 0
        
        for race in races:
            for runner in race['runners']:
                try:
                    properties = {
                        "Race ID": {
                            "title": [{"text": {"content": runner['race_id']}}]
                        },
                        "Date": {"date": {"start": runner['date']}},
                        "Venue": {"select": {"name": "Sha Tin" if runner['venue'] == "ST" else "Happy Valley"}},
                        "Race Number": {"number": runner['race_number']},
                        "Horse Number": {"number": runner['horse_number']},
                        "Horse Name": {"rich_text": [{"text": {"content": runner['horse_name'][:100]}}]},
                        "Horse ID": {"rich_text": [{"text": {"content": runner['horse_id']}}]},
                        "Jockey": {"rich_text": [{"text": {"content": runner['jockey'][:100]}}]},
                        "Trainer": {"rich_text": [{"text": {"content": runner['trainer'][:100]}}]},
                        "Weight": {"number": runner['weight']},
                        "Barrier": {"number": runner['barrier']},
                        "Rating": {"number": runner['rating']},
                        "Last 6 Runs": {"rich_text": [{"text": {"content": runner['last_6_runs']}}]},
                        "Age": {"number": runner['age']},
                        "Sex": {"rich_text": [{"text": {"content": runner['sex']}}]},
                        "Gear": {"rich_text": [{"text": {"content": runner['gear'][:100]}}]},
                        "Owner": {"rich_text": [{"text": {"content": runner['owner'][:100]}}]}
                    }
                    
                    self.notion.add_page(database_id, properties)
                    total_runners += 1
                    logger.info(f"  ✓ {runner['horse_number']}. {runner['horse_name']}")
                    
                except Exception as e:
                    logger.error(f"  ✗ Error saving {runner['horse_name']}: {e}")
        
        logger.info(f"\n✅ Saved {total_runners} runners to Notion")
        return total_runners


# Test
if __name__ == "__main__":
    scraper = HKJCRaceCardScraper(headless=True)
    
    # Test Race 1
    race = scraper.scrape_race("2026-03-15", "ST", 1)
    
    print(f"\nRace 1: {len(race['runners'])} runners")
    for r in race['runners'][:5]:
        print(f"  {r['horse_number']}. {r['horse_name']}")
        print(f"     Jockey: {r['jockey']}, Trainer: {r['trainer']}")
        print(f"     Wt: {r['weight']}, Bar: {r['barrier']}, Rtg: {r['rating']}")
