"""
HKJC Playwright Scraper
Uses browser automation to scrape race cards from HKJC
"""
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
from typing import List, Dict, Optional
from loguru import logger
from datetime import datetime, timedelta
import re
import time


class HKJCPlaywrightScraper:
    """Scrapes HKJC using Playwright browser automation"""
    
    BASE_URL = "https://www.hkjc.com/english/racing"
    
    def __init__(self, headless: bool = True, timeout: int = 30000):
        self.headless = headless
        self.timeout = timeout
        logger.info("Playwright scraper initialized")
    
    def get_upcoming_race_dates(self) -> List[Dict]:
        """Get upcoming race dates from HKJC homepage"""
        dates = []
        
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=self.headless)
            page = browser.new_page()
            
            try:
                logger.info("Fetching HKJC homepage...")
                page.goto(f"{self.BASE_URL}/", timeout=self.timeout)
                page.wait_for_load_state('networkidle', timeout=self.timeout)
                
                # Get page content
                content = page.content()
                
                # Find next meeting date
                # Look for "Next Meeting will be held in DD/MM/YYYY"
                match = re.search(r'Next Meeting.*?(\d{2})/(\d{2})/(\d{4})', content)
                if match:
                    day, month, year = match.groups()
                    date_str = f"{year}{month}{day}"
                    date_obj = datetime.strptime(date_str, "%Y%m%d")
                    venue = "HV" if date_obj.weekday() == 2 else "ST"
                    
                    dates.append({
                        'date': date_str,
                        'date_obj': date_obj,
                        'venue': venue,
                        'description': 'Happy Valley' if venue == 'HV' else 'Sha Tin'
                    })
                
                # Generate typical race days
                today = datetime.now()
                for i in range(14):
                    check_date = today + timedelta(days=i)
                    if check_date.weekday() in [2, 5, 6]:  # Wed, Sat, Sun
                        date_str = check_date.strftime('%Y%m%d')
                        if not any(d['date'] == date_str for d in dates):
                            venue = "HV" if check_date.weekday() == 2 else "ST"
                            dates.append({
                                'date': date_str,
                                'date_obj': check_date,
                                'venue': venue,
                                'description': 'Happy Valley' if venue == 'HV' else 'Sha Tin'
                            })
                
                dates.sort(key=lambda x: x['date'])
                logger.info(f"Found {len(dates)} race dates")
                
            except Exception as e:
                logger.error(f"Error fetching race dates: {e}")
            finally:
                browser.close()
        
        return dates[:14]
    
    def get_race_card(self, race_date: str, venue: str = "ST") -> Dict:
        """
        Get race card using Playwright
        
        Args:
            race_date: YYYYMMDD format
            venue: ST (Sha Tin) or HV (Happy Valley)
        """
        race_card = {
            'race_date': race_date,
            'venue': venue,
            'races': []
        }
        
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=self.headless)
            page = browser.new_page()
            
            try:
                # Try different HKJC URLs
                urls_to_try = [
                    f"{self.BASE_URL}/racecard.asp?RaceDate={race_date}",
                    f"https://racing.hkjc.com/racing/english/racecard/racecard.aspx?RaceDate={race_date}&Racecourse={venue}",
                    f"https://racing.hkjc.com/racing/information/english/racing/racecard.aspx?RaceDate={race_date}"
                ]
                
                for url in urls_to_try:
                    logger.info(f"Trying: {url}")
                    
                    try:
                        page.goto(url, timeout=self.timeout)
                        page.wait_for_load_state('networkidle', timeout=self.timeout)
                        
                        # Check if we got valid content
                        content = page.content()
                        
                        if '404' not in content and 'cannot be found' not in content.lower():
                            logger.info(f"Found valid race card page")
                            
                            # Parse race card
                            races = self._parse_race_card_page(page, race_date, venue)
                            if races:
                                race_card['races'] = races
                                break
                    except PlaywrightTimeout:
                        logger.warning(f"Timeout for {url}")
                        continue
                    except Exception as e:
                        logger.warning(f"Error for {url}: {e}")
                        continue
                
                # If no races found, create mock data
                if not race_card['races']:
                    logger.warning("No race card found, creating mock data")
                    race_card = self._create_mock_race_card(race_date, venue)
                
            except Exception as e:
                logger.error(f"Error fetching race card: {e}")
                race_card = self._create_mock_race_card(race_date, venue)
            finally:
                browser.close()
        
        return race_card
    
    def _parse_race_card_page(self, page, race_date: str, venue: str) -> List[Dict]:
        """Parse race card from Playwright page"""
        races = []
        
        try:
            # Find all race tables
            tables = page.query_selector_all('table')
            
            race_num = 0
            for table in tables:
                # Check if this looks like a race table
                table_text = table.inner_text()
                if 'horse' in table_text.lower() or 'jockey' in table_text.lower():
                    race_num += 1
                    
                    race = {
                        'race_id': f"{race_date}_{venue}_{race_num}",
                        'race_number': race_num,
                        'distance': 0,
                        'track': 'Turf',
                        'runners': []
                    }
                    
                    # Get distance from page
                    distance_match = re.search(r'(\d+)m', table_text)
                    if distance_match:
                        race['distance'] = int(distance_match.group(1))
                    
                    # Parse rows
                    rows = table.query_selector_all('tr')
                    for row in rows[1:]:  # Skip header
                        runner = self._parse_runner_row(row)
                        if runner:
                            runner['race_id'] = race['race_id']
                            race['runners'].append(runner)
                    
                    if race['runners']:
                        races.append(race)
            
            logger.info(f"Parsed {len(races)} races")
            
        except Exception as e:
            logger.error(f"Error parsing race card: {e}")
        
        return races
    
    def _parse_runner_row(self, row) -> Optional[Dict]:
        """Parse runner row from table"""
        try:
            cells = row.query_selector_all('td')
            if len(cells) < 5:
                return None
            
            return {
                'horse_number': int(cells[0].inner_text().strip() or 0),
                'horse': {
                    'horse_id': '',
                    'horse_name': cells[1].inner_text().strip() if len(cells) > 1 else ''
                },
                'jockey': {
                    'jockey_id': '',
                    'jockey_name': cells[2].inner_text().strip() if len(cells) > 2 else ''
                },
                'trainer': {
                    'trainer_id': '',
                    'trainer_name': cells[3].inner_text().strip() if len(cells) > 3 else ''
                },
                'weight_carried': 0,
                'barrier': int(cells[5].inner_text().strip() or 0) if len(cells) > 5 else 0,
                'handicap_rating': 0
            }
        except Exception as e:
            logger.error(f"Error parsing runner: {e}")
            return None
    
    def _create_mock_race_card(self, race_date: str, venue: str) -> Dict:
        """Create mock race card for testing"""
        logger.info(f"Creating mock data for {race_date}")
        
        races = []
        for race_num in range(1, 9):
            runners = []
            for horse_num in range(1, 13):
                runners.append({
                    'horse_number': horse_num,
                    'horse': {
                        'horse_id': f'H{horse_num:03d}',
                        'horse_name': f'Horse {horse_num}'
                    },
                    'jockey': {
                        'jockey_id': f'J{horse_num:02d}',
                        'jockey_name': f'Jockey {horse_num}'
                    },
                    'trainer': {
                        'trainer_id': f'T{horse_num:02d}',
                        'trainer_name': f'Trainer {horse_num}'
                    },
                    'weight_carried': 115 + horse_num,
                    'barrier': horse_num,
                    'handicap_rating': 70 + horse_num,
                    'race_id': f"{race_date}_{venue}_{race_num}"
                })
            
            races.append({
                'race_id': f"{race_date}_{venue}_{race_num}",
                'race_number': race_num,
                'distance': [1000, 1200, 1400, 1600, 1800, 2000, 1400, 1200][race_num - 1],
                'track': 'Turf',
                'runners': runners
            })
        
        return {
            'race_date': race_date,
            'venue': venue,
            'races': races
        }


# Test
if __name__ == "__main__":
    scraper = HKJCPlaywrightScraper(headless=True)
    
    # Get race dates
    dates = scraper.get_upcoming_race_dates()
    print(f"Found {len(dates)} race dates")
    
    for d in dates[:3]:
        print(f"  {d['date']} - {d['description']}")
    
    # Get race card
    if dates:
        print(f"\nFetching race card for {dates[0]['date']}...")
        card = scraper.get_race_card(dates[0]['date'], dates[0]['venue'])
        
        print(f"Venue: {card['venue']}")
        print(f"Races: {len(card['races'])}")
        
        if card['races']:
            print(f"\nFirst race: {len(card['races'][0]['runners'])} runners")
