"""
HKJC Race Card Scraper
Scrapes public race card data from Hong Kong Jockey Club website
"""
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from time import sleep
from loguru import logger
from typing import List, Dict, Optional
import re


class HKJCScraper:
    """Scrapes HKJC public race card data"""
    
    BASE_URL = "https://www.hkjc.com/english/racing"
    
    def __init__(self, delay: float = 2.0, timeout: int = 30):
        self.delay = delay
        self.timeout = timeout
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9',
        })
        logger.add("logs/scraper.log", rotation="10 MB")
    
    def _make_request(self, url: str, params: Optional[Dict] = None) -> Optional[BeautifulSoup]:
        """Make HTTP request with error handling"""
        try:
            logger.info(f"Requesting: {url}")
            response = self.session.get(url, params=params, timeout=self.timeout)
            response.raise_for_status()
            sleep(self.delay)  # Respect rate limit
            return BeautifulSoup(response.content, 'lxml')
        except requests.exceptions.RequestException as e:
            logger.error(f"Request failed: {e}")
            return None
    
    def get_race_dates(self) -> List[str]:
        """Get upcoming race dates"""
        url = f"{self.BASE_URL}/RaceMeeting/Upcoming"
        soup = self._make_request(url)
        
        if not soup:
            return []
        
        race_dates = []
        try:
            # Parse race dates from calendar/meeting list
            date_elements = soup.find_all('a', href=re.compile(r'/racing/RaceMeeting/'))
            for elem in date_elements:
                href = elem.get('href', '')
                date_match = re.search(r'/(\d{8})/', href)
                if date_match:
                    race_dates.append(date_match.group(1))
            
            logger.info(f"Found {len(race_dates)} upcoming race dates")
            return race_dates[:7]  # Next 7 race days
        except Exception as e:
            logger.error(f"Error parsing race dates: {e}")
            return []
    
    def get_race_card(self, race_date: str, venue: str = "ST") -> Dict:
        """
        Get race card for a specific date
        
        Args:
            race_date: Date in YYYYMMDD format
            venue: ST (Sha Tin) or HV (Happy Valley)
        
        Returns:
            Dict with race card data
        """
        url = f"{self.BASE_URL}/RaceCard/English/RaceCard.aspx"
        params = {
            'RaceDate': race_date,
            'Racecourse': venue
        }
        
        soup = self._make_request(url, params)
        if not soup:
            return {}
        
        try:
            race_card = {
                'race_date': race_date,
                'venue': venue,
                'races': []
            }
            
            # Parse race information
            race_tables = soup.find_all('table', class_='race_table')
            
            for race_table in race_tables:
                race_data = self._parse_race_table(race_table, race_date, venue)
                if race_data:
                    race_card['races'].append(race_data)
            
            logger.info(f"Parsed {len(race_card['races'])} races for {race_date}")
            return race_card
        except Exception as e:
            logger.error(f"Error parsing race card: {e}")
            return {}
    
    def _parse_race_table(self, table, race_date: str, venue: str) -> Dict:
        """Parse individual race table"""
        try:
            race_data = {
                'race_number': 0,
                'race_time': None,
                'distance': 0,
                'track': '',
                'track_condition': '',
                'runners': []
            }
            
            # Extract race header info
            header = table.find_previous('div', class_='race_header')
            if header:
                race_num = re.search(r'Race (\d+)', header.text)
                if race_num:
                    race_data['race_number'] = int(race_num.group(1))
                
                distance = re.search(r'(\d+)m', header.text)
                if distance:
                    race_data['distance'] = int(distance.group(1))
            
            # Parse runners
            rows = table.find_all('tr')
            for row in rows[1:]:  # Skip header
                runner = self._parse_runner_row(row, race_date, venue, race_data['race_number'])
                if runner:
                    race_data['runners'].append(runner)
            
            return race_data
        except Exception as e:
            logger.error(f"Error parsing race table: {e}")
            return {}
    
    def _parse_runner_row(self, row, race_date: str, venue: str, race_number: int) -> Dict:
        """Parse individual runner row"""
        try:
            cells = row.find_all('td')
            if len(cells) < 5:
                return {}
            
            runner = {
                'race_id': f"{race_date}_{venue}_{race_number}",
                'horse_number': int(cells[0].text.strip()) if cells[0].text.strip().isdigit() else 0,
                'horse': {
                    'horse_id': '',
                    'horse_name': cells[1].text.strip() if len(cells) > 1 else ''
                },
                'jockey': {
                    'jockey_id': '',
                    'jockey_name': cells[2].text.strip() if len(cells) > 2 else ''
                },
                'trainer': {
                    'trainer_id': '',
                    'trainer_name': cells[3].text.strip() if len(cells) > 3 else ''
                },
                'weight_carried': float(cells[4].text.strip()) if len(cells) > 4 and cells[4].text.strip() else 0,
                'barrier': int(cells[5].text.strip()) if len(cells) > 5 and cells[5].text.strip().isdigit() else 0,
                'handicap_rating': 0,
                'last_6_runs': ''
            }
            
            # Extract horse ID from link if available
            horse_link = cells[1].find('a')
            if horse_link:
                href = horse_link.get('href', '')
                horse_id_match = re.search(r'Ho No=(\d+)', href)
                if horse_id_match:
                    runner['horse']['horse_id'] = horse_id_match.group(1)
            
            return runner
        except Exception as e:
            logger.error(f"Error parsing runner row: {e}")
            return {}
    
    def get_horse_profile(self, horse_id: str) -> Dict:
        """Get detailed horse profile"""
        url = f"{self.BASE_URL}/horseProfile.asp"
        params = {'horseNo': horse_id}
        
        soup = self._make_request(url, params)
        if not soup:
            return {}
        
        try:
            profile = {
                'horse_id': horse_id,
                'horse_name': '',
                'age': 0,
                'sex': '',
                'color': '',
                'country_of_origin': '',
                'sire': '',
                'dam': '',
                'past_performances': []
            }
            
            # Parse horse details
            tables = soup.find_all('table')
            for table in tables:
                rows = table.find_all('tr')
                for row in rows:
                    cells = row.find_all('td')
                    if len(cells) >= 2:
                        label = cells[0].text.strip().lower()
                        value = cells[1].text.strip()
                        
                        if 'horse name' in label:
                            profile['horse_name'] = value
                        elif 'age' in label:
                            profile['age'] = int(value) if value.isdigit() else 0
                        elif 'sex' in label:
                            profile['sex'] = value
                        elif 'colour' in label or 'color' in label:
                            profile['color'] = value
                        elif 'country' in label:
                            profile['country_of_origin'] = value
                        elif 'sire' in label:
                            profile['sire'] = value
                        elif 'dam' in label:
                            profile['dam'] = value
            
            logger.info(f"Parsed profile for horse {horse_id}")
            return profile
        except Exception as e:
            logger.error(f"Error parsing horse profile: {e}")
            return {}
    
    def get_race_results(self, race_date: str, venue: str = "ST") -> List[Dict]:
        """Get race results for a specific date"""
        url = f"{self.BASE_URL}/Results/English/Results.aspx"
        params = {
            'RaceDate': race_date,
            'Racecourse': venue
        }
        
        soup = self._make_request(url, params)
        if not soup:
            return []
        
        try:
            results = []
            result_tables = soup.find_all('table', class_='result_table')
            
            for table in result_tables:
                race_result = self._parse_result_table(table, race_date, venue)
                if race_result:
                    results.append(race_result)
            
            logger.info(f"Parsed {len(results)} race results for {race_date}")
            return results
        except Exception as e:
            logger.error(f"Error parsing race results: {e}")
            return []
    
    def _parse_result_table(self, table, race_date: str, venue: str) -> Dict:
        """Parse race result table"""
        try:
            result = {
                'race_id': '',
                'race_number': 0,
                'runners': []
            }
            
            rows = table.find_all('tr')
            for row in rows[1:]:  # Skip header
                cells = row.find_all('td')
                if len(cells) >= 4:
                    runner_result = {
                        'finishing_position': int(cells[0].text.strip()) if cells[0].text.strip().isdigit() else 0,
                        'horse_number': int(cells[1].text.strip()) if cells[1].text.strip().isdigit() else 0,
                        'horse_name': cells[2].text.strip(),
                        'jockey_name': cells[3].text.strip() if len(cells) > 3 else '',
                        'finish_time': float(cells[4].text.strip()) if len(cells) > 4 and cells[4].text.strip() else 0,
                        'margin': 0.0
                    }
                    result['runners'].append(runner_result)
            
            return result
        except Exception as e:
            logger.error(f"Error parsing result table: {e}")
            return {}


# Example usage
if __name__ == "__main__":
    scraper = HKJCScraper(delay=2.0)
    
    # Get upcoming race dates
    race_dates = scraper.get_race_dates()
    print(f"Upcoming race dates: {race_dates}")
    
    # Get race card for first date
    if race_dates:
        race_card = scraper.get_race_card(race_dates[0])
        print(f"Race card: {race_card}")
