"""
HK Racing Analytics - Main Runner
Fetch race cards, extract features, and ML predictions
"""
import sys
import os
import sqlite3
from datetime import datetime, timedelta
from loguru import logger
from typing import List, Dict

# Add paths
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'scripts'))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'config'))

from scrapers.hkjc_scraper import HKJCScraper
from features.feature_extractor import FeatureExtractor
from models import Base, engine, Runner, Race, Result, OddsHistory, Feature, Prediction
from config.settings import SCRAPER_DELAY, SCRAPER_TIMEOUT, MAX_RUNS_HISTORY, VENUES

# Configure logging
logger.add(sys.stderr, level="INFO")


class RacingAnalytics:
    """Main analytics runner"""
    
    def __init__(self):
        self.scraper = HKJCScraper(delay=SCRAPER_DELAY, timeout=SCRAPER_TIMEOUT)
        self.feature_extractor = FeatureExtractor()
        logger.info("Initialized Racing Analytics")
    
    def fetch_upcoming_races(self, days_ahead: int = 7) -> List[str]:
        """Fetch upcoming race dates"""
        logger.info(f"Fetching upcoming races for next {days_ahead} days")
        return self.scraper.get_race_dates()[:days_ahead]
    
    def process_race_card(self, race_date: str) -> Dict:
        """Process race card for a date"""
        logger.info(f"Processing race card for {race_date}")
        
        # Try both venues
        for venue in VENUES.keys():
            race_card = self.scraper.get_race_card(race_date, venue)
            if race_card and race_card.get('races'):
                return race_card
        
        logger.warning(f"No race card found for {race_date}")
        return {}
    
    def extract_and_store_features(self, race_date: str, race_number: int, runner_data: Dict) -> Feature:
        """Extract and store features for a runner"""
        logger.info(f"Extracting features for runner {runner_data.get('horse_number')} in race {race_number}")
        
        # Extract features
        features = self.feature_extractor.extract_all_features(
            runner_data['horse'],
            runner_data['jockey'],
            runner_data['trainer'],
            race_date,
            race_number
        )
        
        # Create feature record
        feature_record = Feature(
            runner_id=runner_data.get('runner_id', ''),
            race_id=f"{race_date}_{race_number}",
            feature_date=datetime.now(),
            **features
        )
        
        # Save to database
        session = Base.session
        session.add(feature_record)
        session.commit()
        
        logger.info(f"Stored features for runner {runner_data.get('horse_number')}")
        return feature_record
    
    def train_model(self):
        """Train ML model on historical data"""
        logger.info("Training ML model")
        # TODO: Implement model training
        # For now, just placeholder
        logger.warning("Model training not yet implemented")
    
    def generate_predictions(self, race_date: str, race_number: int) -> List[Prediction]:
        """Generate predictions for a race"""
        logger.info(f"Generating predictions for race {race_number} on {race_date}")
        
        # Get all runners for this race
        session = Base.session
        runners = session.query(Runner).filter_by(race_id=f"{race_date}_{race_number}").all()
        
        if not runners:
            logger.warning(f"No runners found for race {race_number} on {race_date}")
            return []
        
        predictions = []
        for runner in runners:
            # Get features for this runner
            features = session.query(Feature).filter_by(runner_id=runner.runner_id).first()
            
            if features:
                # TODO: Use actual ML model for prediction
                # For now, use simple heuristic
                win_prob = self._simple_heuristic(features)
                
                prediction = Prediction(
                    runner_id=runner.runner_id,
                    race_id=f"{race_date}_{race_number}",
                    win_probability=win_prob,
                    place_probability=win_prob * 0.8,  # Simplified
                    show_probability=win_prob * 0.6,  # Simplified
                    confidence_score=0.5  # Placeholder
                )
                predictions.append(prediction)
        
        # Save predictions
        session.add_all(predictions)
        session.commit()
        
        logger.info(f"Generated {len(predictions)} predictions")
        return predictions
    
    def _simple_heuristic(self, features: Feature) -> float:
        """Simple heuristic for win probability"""
        # TODO: Replace with actual ML model
        prob = 0.1  # Base probability
        
        # Adjust based on class rating
        if features.class_rating:
            prob += features.class_rating * 0.01
        
        # Adjust based on recent form
        if features.win_rate_last_6 and features.win_rate_last_6 > 0:
            prob += features.win_rate_last_6 * 0.05
        
        return min(max(prob, 0.01), 0.99)  # Clamp between 1% and 99%
    
    def run_daily(self):
        """Run daily analytics pipeline"""
        logger.info("Starting daily analytics pipeline")
        
        # Initialize database
        Base.metadata.create_all(engine)
        
        # Fetch upcoming races
        race_dates = self.fetch_upcoming_races()
        
        if not race_dates:
            logger.warning("No upcoming races found")
            return
        
        logger.info(f"Found {len(race_dates)} upcoming race dates")
        
        # Process each race date
        for race_date in race_dates:
            logger.info(f"Processing race date: {race_date}")
            
            # Fetch race card
            race_card = self.process_race_card(race_date)
            
            if not race_card:
                continue
            
            # Process each race
            for race_data in race_card.get('races', []):
                race_number = race_data.get('race_number')
                
                # Store race info
                race_record = Race(
                    race_id=f"{race_date}_{race_number}",
                    race_date=datetime.strptime(race_date, "%Y%m%d"),
                    venue=race_card.get('venue', 'ST'),
                    race_number=race_number,
                    distance=race_data.get('distance', 0),
                    track=race_data.get('track', '')
                )
                
                session = Base.session
                session.add(race_record)
                session.commit()
                
                # Process each runner
                for runner_data in race_data.get('runners', []):
                    # Store runner info
                    runner_record = Runner(
                        runner_id=f"{race_date}_{race_number}_{runner_data.get('horse_number')}",
                        race_id=f"{race_date}_{race_number}",
                        horse_id=runner_data.get('horse', {}).get('horse_id', ''),
                        jockey_id=runner_data.get('jockey', {}).get('jockey_id', ''),
                        trainer_id=runner_data.get('trainer', {}).get('trainer_id', ''),
                        horse_number=runner_data.get('horse_number', 0),
                        horse_name=runner_data.get('horse', {}).get('horse_name', ''),
                        jockey_name=runner_data.get('jockey', {}).get('jockey_name', ''),
                        trainer_name=runner_data.get('trainer', {}).get('trainer_name', ''),
                        weight_carried=runner_data.get('weight_carried', 0),
                        barrier=runner_data.get('barrier', 0),
                        handicap_rating=runner_data.get('handicap_rating', 0)
                    )
                    
                    session.add(runner_record)
                    session.commit()
                    
                    # Extract and store features
                    self.extract_and_store_features(race_date, race_number, runner_data)
                
                # Generate predictions
                self.generate_predictions(race_date, race_number)
        
        logger.info("Completed daily analytics pipeline")


def main():
    """Main entry point"""
    analytics = RacingAnalytics()
    analytics.run_daily()


if __name__ == "__main__":
    main()
