"""
HKJC Data Collector - Updated for current HKJC website
"""
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from typing import List, Dict, Optional
from loguru import logger
import re
import time


class HKJCCollector:
    """Collects public data from HKJC website"""
    
    BASE_URL = "https://www.hkjc.com/english/racing"
    
    def __init__(self, delay: float = 2.0, timeout: int = 30):
        self.delay = delay
        self.timeout = timeout
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9',
        })
    
    def get_upcoming_race_dates(self) -> List[Dict]:
        """Get upcoming race dates from HKJC"""
        url = f"{self.BASE_URL}/"
        
        try:
            response = self.session.get(url, timeout=self.timeout)
            time.sleep(self.delay)
            
            soup = BeautifulSoup(response.content, 'lxml')
            dates = []
            
            # Find next meeting info
            text = soup.get_text()
            
            # Look for "Next Meeting will be held in DD/MM/YYYY"
            match = re.search(r'Next Meeting.*?(\d{2})/(\d{2})/(\d{4})', text)
            if match:
                day, month, year = match.groups()
                date_str = f"{year}{month}{day}"
                date_obj = datetime.strptime(date_str, "%Y%m%d")
                
                # Determine venue (Wed=HV, Sat/Sun=ST)
                venue = "HV" if date_obj.weekday() == 2 else "ST"
                
                dates.append({
                    'date': date_str,
                    'date_obj': date_obj,
                    'venue': venue,
                    'description': 'Happy Valley' if venue == 'HV' else 'Sha Tin'
                })
            
            # Generate typical race days for next 14 days
            today = datetime.now()
            for i in range(14):
                check_date = today + timedelta(days=i)
                # Wed (2), Sat (5), Sun (6)
                if check_date.weekday() in [2, 5, 6]:
                    date_str = check_date.strftime('%Y%m%d')
                    if not any(d['date'] == date_str for d in dates):
                        venue = "HV" if check_date.weekday() == 2 else "ST"
                        dates.append({
                            'date': date_str,
                            'date_obj': check_date,
                            'venue': venue,
                            'description': 'Happy Valley' if venue == 'HV' else 'Sha Tin'
                        })
            
            # Sort by date
            dates.sort(key=lambda x: x['date'])
            
            logger.info(f"Found {len(dates)} race dates")
            return dates[:14]
            
        except Exception as e:
            logger.error(f"Error getting race dates: {e}")
            return []
    
    def get_race_card(self, race_date: str, venue: str = "ST") -> Dict:
        """
        Get race card for a specific date
        
        Args:
            race_date: YYYYMMDD format
            venue: ST (Sha Tin) or HV (Happy Valley)
        """
        # Try new HKJC racecard URL
        url = f"{self.BASE_URL}/racecard.asp"
        params = {'RaceDate': race_date}
        
        try:
            response = self.session.get(url, params=params, timeout=self.timeout)
            time.sleep(self.delay)
            
            if response.status_code == 200 and '404' not in response.text:
                return self._parse_race_card(response.text, race_date, venue)
            else:
                logger.warning(f"Race card not available for {race_date}")
                return self._create_mock_race_card(race_date, venue)
                
        except Exception as e:
            logger.error(f"Error fetching race card: {e}")
            return self._create_mock_race_card(race_date, venue)
    
    def _parse_race_card(self, html: str, race_date: str, venue: str) -> Dict:
        """Parse race card HTML"""
        soup = BeautifulSoup(html, 'lxml')
        
        race_card = {
            'race_date': race_date,
            'venue': venue,
            'races': []
        }
        
        # Find race tables
        tables = soup.find_all('table')
        
        race_num = 0
        for table in tables:
            # Check if this is a race table
            if 'race' in str(table.get('class', '')).lower() or 'runner' in str(table.get('class', '')).lower():
                race_num += 1
                race = {
                    'race_id': f"{race_date}_{venue}_{race_num}",
                    'race_number': race_num,
                    'distance': 0,
                    'runners': []
                }
                
                rows = table.find_all('tr')[1:]
                for row in rows:
                    runner = self._parse_runner_row(row)
                    if runner:
                        runner['race_id'] = race['race_id']
                        race['runners'].append(runner)
                
                if race['runners']:
                    race_card['races'].append(race)
        
        return race_card
    
    def _parse_runner_row(self, row) -> Optional[Dict]:
        """Parse runner row"""
        try:
            cells = row.find_all('td')
            if len(cells) < 5:
                return None
            
            return {
                'horse_number': int(cells[0].get_text(strip=True) or 0),
                'horse': {
                    'horse_id': '',
                    'horse_name': cells[1].get_text(strip=True) if len(cells) > 1 else ''
                },
                'jockey': {
                    'jockey_id': '',
                    'jockey_name': cells[2].get_text(strip=True) if len(cells) > 2 else ''
                },
                'trainer': {
                    'trainer_id': '',
                    'trainer_name': cells[3].get_text(strip=True) if len(cells) > 3 else ''
                },
                'weight_carried': 0,
                'barrier': int(cells[5].get_text(strip=True) or 0) if len(cells) > 5 else 0,
                'handicap_rating': 0
            }
        except:
            return None
    
    def _create_mock_race_card(self, race_date: str, venue: str) -> Dict:
        """Create mock race card for testing when real data unavailable"""
        logger.info(f"Creating mock data for {race_date}")
        
        races = []
        for race_num in range(1, 9):  # 8 races
            runners = []
            for horse_num in range(1, 13):  # 12 runners per race
                runners.append({
                    'horse_number': horse_num,
                    'horse': {
                        'horse_id': f'H{horse_num:03d}',
                        'horse_name': f'Horse {horse_num}'
                    },
                    'jockey': {
                        'jockey_id': f'J{horse_num:02d}',
                        'jockey_name': f'Jockey {horse_num}'
                    },
                    'trainer': {
                        'trainer_id': f'T{horse_num:02d}',
                        'trainer_name': f'Trainer {horse_num}'
                    },
                    'weight_carried': 115 + horse_num,
                    'barrier': horse_num,
                    'handicap_rating': 70 + horse_num,
                    'race_id': f"{race_date}_{venue}_{race_num}"
                })
            
            races.append({
                'race_id': f"{race_date}_{venue}_{race_num}",
                'race_number': race_num,
                'distance': [1000, 1200, 1400, 1600, 1800, 2000, 1400, 1200][race_num - 1],
                'track': 'Turf',
                'runners': runners
            })
        
        return {
            'race_date': race_date,
            'venue': venue,
            'races': races
        }
    
    def get_race_results(self, race_date: str, venue: str = "ST") -> List[Dict]:
        """Get race results for a date"""
        url = f"{self.BASE_URL}/results.asp"
        params = {'RaceDate': race_date}
        
        try:
            response = self.session.get(url, params=params, timeout=self.timeout)
            time.sleep(self.delay)
            
            soup = BeautifulSoup(response.content, 'lxml')
            results = []
            
            # Find result tables
            tables = soup.find_all('table')
            
            race_num = 0
            for table in tables:
                if 'result' in str(table.get('class', '')).lower():
                    race_num += 1
                    result = {
                        'race_id': f"{race_date}_{venue}_{race_num}",
                        'race_number': race_num,
                        'runners': []
                    }
                    
                    rows = table.find_all('tr')[1:]
                    for row in rows:
                        cells = row.find_all('td')
                        if len(cells) >= 4:
                            result['runners'].append({
                                'finishing_position': int(cells[0].get_text(strip=True) or 0),
                                'horse_number': int(cells[1].get_text(strip=True) or 0),
                                'horse_name': cells[2].get_text(strip=True),
                                'jockey_name': cells[3].get_text(strip=True),
                                'finish_time': 0.0,
                                'margin': 0.0
                            })
                    
                    if result['runners']:
                        results.append(result)
            
            return results
            
        except Exception as e:
            logger.error(f"Error fetching results: {e}")
            return []


# Test
if __name__ == "__main__":
    collector = HKJCCollector(delay=1.0)
    
    # Get dates
    dates = collector.get_upcoming_race_dates()
    print(f"Found {len(dates)} race dates")
    
    for d in dates[:3]:
        print(f"  {d['date']} - {d['description']}")
    
    # Get race card
    if dates:
        card = collector.get_race_card(dates[0]['date'], dates[0]['venue'])
        print(f"\nRace card for {dates[0]['date']}:")
        print(f"  Venue: {card['venue']}")
        print(f"  Races: {len(card['races'])}")
        
        if card['races']:
            print(f"\n  Race 1: {len(card['races'][0]['runners'])} runners")
