"""
Puja API Cache Manager

This module handles fetching, caching, and refreshing puja information from the backend API.
Cache is stored in a JSON file and refreshed weekly or on-demand.

TOKEN OPTIMIZATION:
- Extracts lightweight index (names, IDs, categories only)
- Uses simple keyword matching first
- Optionally uses LLM for complex queries (with minimal context)
- Fetches full details only for matched pujas
"""
import json
import logging
import os
from datetime import datetime, timedelta
from typing import Dict, List, Optional
from pathlib import Path
import httpx

logger = logging.getLogger(__name__)

# Cache configuration
CACHE_FILE = "data/puja_api_cache.json"
CACHE_DURATION_DAYS = 7  # Refresh weekly
API_URL = "https://karishyewebbackend.divami.com/all-pujas"


class PujaAPICache:
    """Manages caching of puja information from the backend API."""
    
    def __init__(self, cache_file: str = CACHE_FILE):
        """Initialize the cache manager."""
        # Get project root directory reliably (works in dev and production)
        # Go up from app/utils/puja_api_cache.py -> app/utils -> app -> project_root
        current_file = Path(__file__).resolve()
        self.project_root = current_file.parent.parent.parent
        self.cache_path = self.project_root / cache_file
        logger.debug(f"[PUJA CACHE] Cache initialized")
    
    def _load_cache(self) -> Optional[Dict]:
        """Load cache from disk if it exists and is valid."""
        try:
            if not self.cache_path.exists():
                logger.debug(f"[PUJA CACHE] Cache file not found")
                return None
            
            with self.cache_path.open('r', encoding='utf-8') as f:
                cache_data = json.load(f)
            
            # Check if cache is expired
            cached_time = datetime.fromisoformat(cache_data.get('cached_at', '2000-01-01'))
            expiry_time = cached_time + timedelta(days=CACHE_DURATION_DAYS)
            
            if datetime.now() > expiry_time:
                logger.debug(f"[PUJA CACHE] Cache expired")
                return None
            
            logger.debug(f"[PUJA CACHE] Valid cache found")
            return cache_data
            
        except Exception as e:
            logger.error(f"[PUJA CACHE] Error loading cache: {type(e).__name__}")
            return None
    
    def _save_cache(self, data: Dict) -> None:
        """Save cache to disk."""
        try:
            # Ensure the data directory exists
            self.cache_path.parent.mkdir(parents=True, exist_ok=True)
            
            cache_data = {
                'cached_at': datetime.now().isoformat(),
                'data': data
            }
            
            with self.cache_path.open('w', encoding='utf-8') as f:
                json.dump(cache_data, f, indent=2, ensure_ascii=False)
            
            logger.debug(f"[PUJA CACHE] Cache saved successfully")
            
        except Exception as e:
            logger.error(f"[PUJA CACHE] Error saving cache: {type(e).__name__}")
    
    async def _fetch_from_api(self) -> Optional[Dict]:
        """Fetch fresh data from the API."""
        try:
            logger.debug(f"[PUJA CACHE] Fetching fresh data from API")
            
            async with httpx.AsyncClient(timeout=30.0) as client:
                response = await client.get(API_URL)
                response.raise_for_status()
                data = response.json()
            
            logger.info(f"[PUJA CACHE] Successfully fetched data from API")
            return data
            
        except httpx.HTTPError as e:
            logger.error(f"[PUJA CACHE] HTTP error: {type(e).__name__}")
            return None
        except Exception as e:
            logger.error(f"[PUJA CACHE] Error fetching from API: {type(e).__name__}")
            return None
    
    async def get_puja_data(self, force_refresh: bool = False) -> Optional[Dict]:
        """
        Get puja data from cache or API.
        
        Args:
            force_refresh: If True, bypass cache and fetch fresh data
        
        Returns:
            Dictionary containing puja information or None if failed
        """
        # Try cache first (unless force refresh)
        if not force_refresh:
            cache_data = self._load_cache()
            if cache_data:
                logger.debug("[PUJA CACHE] Returning data from cache")
                return cache_data.get('data')
        
        # Fetch from API
        logger.debug("[PUJA CACHE] Fetching fresh data from API")
        api_data = await self._fetch_from_api()
        
        if api_data:
            # Save to cache
            self._save_cache(api_data)
            return api_data
        else:
            # API failed, try to return stale cache as fallback
            logger.warning("[PUJA CACHE] API fetch failed, trying stale cache as fallback")
            cache_data = self._load_cache()
            if cache_data:
                logger.debug("[PUJA CACHE] Returning stale cache data as fallback")
                return cache_data.get('data')
            
            logger.error("[PUJA CACHE] No cache available")
            return None
    
    def _extract_puja_index(self, puja_data: Dict) -> List[Dict]:
        """
        Extract a lightweight index of all pujas (just names, IDs, categories).
        This is sent to LLM for initial matching to save tokens.
        
        Returns:
            List of lightweight puja summaries
        """
        index = []
        
        if isinstance(puja_data, dict) and puja_data.get('status') == 'success':
            categories = puja_data.get('data', [])
        else:
            return []
        
        for category in categories:
            category_name = category.get('name', '')
            
            for sub_category in category.get('subCategories', []):
                for mapping in sub_category.get('pujasubcategorymappings', []):
                    puja = mapping.get('pujas', {})
                    
                    # Only basic info for matching
                    index.append({
                        'id': puja.get('id'),
                        'name': puja.get('name'),
                        'url_name': puja.get('url_name'),
                        'category': category_name,
                        'subcategory': sub_category.get('name'),
                        'description_preview': (puja.get('description') or '')[:100]  # Just first 100 chars
                    })
        
        return index
    
    async def _llm_match_pujas(self, puja_index: List[Dict], query: str) -> List[int]:
        """
        Use LLM to intelligently match user query to puja IDs.
        Sends only lightweight index to save tokens.
        
        Args:
            puja_index: Lightweight index of pujas
            query: User's search query
        
        Returns:
            List of matched puja IDs
        """
        try:
            # Import here to avoid circular imports
            from app.agents.agents import general_agent
            
            # Create compact index representation
            index_text = "\n".join([
                f"ID: {p['id']} | Name: {p['name']} | Category: {p['category']} | Preview: {p['description_preview'][:50]}..."
                for p in puja_index[:100]  # Increased to 100 pujas for better coverage
            ])
            
            prompt = f"""Given this list of pujas and a user query, identify the most relevant puja IDs (up to 3).

PUJA INDEX:
{index_text}

USER QUERY: {query}

Return ONLY a JSON array of puja IDs (integers) that best match the query.
Consider semantic meaning, not just keywords.
Examples:
- "marriage" or "wedding" → find "Marriage Engagement", "Vivaha", or wedding-related pujas
- "cost of wedding" → find "Vivaaham" or "Marriage" pujas
- "baby naming" → find "Naamakaranam"
- "new home" → find "Gruhapravesham"
- "obstacle removal" → find "Ganapati Homam"
- "engagement" → find "Marriage Engagement" or engagement-related ceremonies

IMPORTANT: Look for partial word matches too. "marriage" should match "Marriage Engagement".

Return format: [4, 15, 23]
If no good match, return: []
"""

            result = await general_agent.run(prompt)
            matched_ids_text = result.output.strip()
            
            # Parse JSON array
            import json
            matched_ids = json.loads(matched_ids_text)
            
            logger.debug(f"[LLM MATCH] Matched {len(matched_ids)} pujas")
            return matched_ids
            
        except Exception as e:
            logger.error(f"[LLM MATCH] Error: {type(e).__name__}")
            return []
    
    def _get_detailed_puja_info(self, puja_data: Dict, puja_ids: List[int]) -> List[Dict]:
        """
        Get full detailed information for specific puja IDs.
        
        Args:
            puja_data: Full API data
            puja_ids: List of puja IDs to fetch details for
        
        Returns:
            List of full puja objects with all details
        """
        detailed_pujas = []
        
        if isinstance(puja_data, dict) and puja_data.get('status') == 'success':
            categories = puja_data.get('data', [])
        else:
            return []
        
        for category in categories:
            category_name = category.get('name', '')
            
            for sub_category in category.get('subCategories', []):
                for mapping in sub_category.get('pujasubcategorymappings', []):
                    puja = mapping.get('pujas', {})
                    
                    if puja.get('id') in puja_ids:
                        # Add puja with its category context
                        puja_with_context = {
                            **puja,
                            'category': category_name,
                            'subcategory': sub_category.get('name', '')
                        }
                        detailed_pujas.append(puja_with_context)
        
        return detailed_pujas
    
    async def search_puja(self, query: str, use_llm: bool = False, include_pricing: bool = False) -> Optional[str]:
        """
        Token-optimized search with optional LLM enhancement.
        
        WORKFLOW:
        1. Extract lightweight index (names, IDs, categories only) - saves tokens
        2. Try simple keyword matching first (fast, free)
        3. If use_llm=True and no matches, use LLM with minimal context
        4. Fetch full details only for matched pujas
        
        Args:
            query: Search query (puja name, category, pricing info, etc.)
            use_llm: If True, use LLM for intelligent semantic matching when keyword search fails
            include_pricing: If True, include pricing details in response; if False, only spiritual/ritual details
        
        Returns:
            Formatted string with matching puja information
        """
        puja_data = await self.get_puja_data()
        
        if not puja_data:
            return "I apologize, but I'm unable to fetch the puja information at the moment. Please try again later."
        
        # Step 1: Extract lightweight index (saves tokens)
        puja_index = self._extract_puja_index(puja_data)
        
        if not puja_index:
            return "I apologize, but the puja information format is unexpected. Please try again later."
        
        logger.debug(f"[SEARCH] Searching {len(puja_index)} pujas | LLM: {use_llm} | Include Pricing: {include_pricing}")
        
        # Step 2: Normalize query with common synonyms (helps with marriage, wedding, etc.)
        query_lower = query.lower()
        
        # Marriage/Wedding synonym expansion
        marriage_synonyms = ['marriage', 'wedding', 'vivah', 'vivaha', 'shaadi', 'kalyanam', 'engagement']
        if any(syn in query_lower for syn in marriage_synonyms):
            # Expand query to include multiple marriage-related terms
            query_lower = f"{query_lower} marriage engagement vivah kalyanam"
        
        # Step 3: Try simple keyword matching first (fast and free)
        simple_matches = []
        
        for puja in puja_index:
            puja_name = (puja.get('name') or '').lower()
            puja_desc = (puja.get('description_preview') or '').lower()
            category = (puja.get('category') or '').lower()
            url_name = (puja.get('url_name') or '').lower()
            
            # Multi-word matching (e.g., "Ganapati Homam" matches both "ganapati" and "homam")
            query_words = query_lower.split()
            
            # Check if query words appear in puja details
            match_count = 0
            for word in query_words:
                if len(word) < 3:  # Skip very short words like "of", "in", etc.
                    continue
                if word in puja_name or word in puja_desc or word in category or word in url_name:
                    match_count += 1
            
            # Consider it a match if at least half the meaningful words match
            # OR if it's a single-word query and it matches
            meaningful_words = [w for w in query_words if len(w) >= 3]
            if not meaningful_words:
                continue
                
            threshold = max(1, len(meaningful_words) // 2)  # At least half the words should match
            
            if match_count >= threshold:
                simple_matches.append(puja.get('id'))
        
        logger.debug(f"[SEARCH] Keyword matching found {len(simple_matches)} matches")
        
        # If simple matching found results, use them
        if simple_matches:
            results = self._get_detailed_puja_info(puja_data, simple_matches[:5])  # Limit to 5
            
            if results:
                formatted_results = self._format_puja_results(results, include_pricing=include_pricing)
                logger.debug(f"[SEARCH] Returning {len(results)} results from keyword matching")
                return formatted_results
        
        # Step 3: If no simple matches and LLM is enabled, try LLM matching
        if use_llm and not simple_matches:
            logger.debug(f"[SEARCH] Trying LLM semantic matching")
            llm_matches = await self._llm_match_pujas(puja_index, query)
            
            if llm_matches:
                results = self._get_detailed_puja_info(puja_data, llm_matches)
                
                if results:
                    formatted_results = self._format_puja_results(results, include_pricing=include_pricing)
                    logger.debug(f"[SEARCH] Returning {len(results)} results from LLM matching")
                    return formatted_results
        
        # No matches found
        logger.debug(f"[SEARCH] No matches found")
        return f"I couldn't find specific information matching '{query}' in our current puja catalog. Could you try rephrasing your question or ask about a specific puja name?"
    
    def _format_puja_results(self, pujas: List[Dict], include_pricing: bool = False) -> str:
        """
        Format puja data into a readable string.
        
        Args:
            pujas: List of puja dictionaries to format
            include_pricing: If True, include pricing information; if False, only spiritual/ritual details
        
        Returns:
            Formatted string with puja information
        """
        if not pujas:
            return "No results found."
        
        result_parts = []
        
        for puja in pujas[:5]:  # Limit to top 5 results
            name = puja.get('name', 'Unknown Puja')
            description = puja.get('description', 'No description available')
            category = puja.get('category', '')
            region = puja.get('region', '')
            language = puja.get('language', '')
            
            # Get puja models (pricing information)
            puja_models = puja.get('pujamodels', [])
            
            puja_info = f"**{name}**\n"
            
            if category:
                puja_info += f"Category: {category}\n"
            
            if description and description != 'No description available':
                # Truncate long descriptions
                desc = description[:300] + "..." if len(description) > 300 else description
                puja_info += f"\nDescription: {desc}\n"
            
            if region:
                puja_info += f"Region: {region}\n"
            
            if language:
                puja_info += f"Language: {language.title()}\n"
            
            # Process puja models for rituals and pricing
            if puja_models:
                # ALWAYS show ritual/spiritual information (regardless of include_pricing)
                for idx, model in enumerate(puja_models[:3], 1):  # Show up to 3 models
                    duration = model.get('durationHrs', 'N/A')
                    num_pujaris = model.get('noOfPujaris', 'N/A')
                    is_popular = model.get('isPopularModel', False)
                    
                    popular_tag = " ⭐ (Popular)" if is_popular else ""
                    
                    # Show ritual details first
                    procedures = model.get('procedures', [])
                    if procedures:
                        puja_info += f"\n**Sacred Rituals ({len(procedures)} rituals){popular_tag}:**\n"
                        # Show all procedures with descriptions
                        for proc_data in procedures:
                            proc = proc_data.get('procedure', {})
                            title = proc.get('title', 'Unknown')
                            puja_info += f"  • {title}\n"
                    
                    # Add duration and pujaris info
                    puja_info += f"\n**Service Details:**\n"
                    puja_info += f"  • Duration: {duration} hours\n"
                    puja_info += f"  • Number of Pujaris: {num_pujaris}\n"
                    
                    # ONLY include pricing if requested
                    if include_pricing:
                        price = model.get('modelSellingPrice', 'N/A')
                        pujari_cost = model.get('pujariCost')
                        samagri_cost = model.get('samagriCost')
                        service_fee = model.get('serviceFee')
                        
                        puja_info += f"\n**Pricing Information:**\n"
                        puja_info += f"  • Total Price: ₹{price:,}\n" if isinstance(price, (int, float)) else f"  • Total Price: {price}\n"
                        
                        # Include detailed cost breakdown if available
                        if pujari_cost is not None or samagri_cost is not None or service_fee is not None:
                            puja_info += f"\n**Price Breakdown:**\n"
                            if pujari_cost is not None:
                                puja_info += f"  • Guru Dakshina (goes 100% to pujari): ₹{pujari_cost:,}\n"
                            if samagri_cost is not None:
                                puja_info += f"  • Samagri (pure organic puja materials): ₹{samagri_cost:,}\n"
                            if service_fee is not None:
                                puja_info += f"  • Service Fee: ₹{service_fee:,}\n"
                    
                    puja_info += "\n"  # Add spacing between options
                    
                    # Only show first model details to keep response concise
                    break
            
            result_parts.append(puja_info)
        
        return "\n" + ("=" * 60) + "\n" + "\n".join(result_parts) + ("=" * 60)


# Global cache instance
_cache_instance = None


def get_cache_instance() -> PujaAPICache:
    """Get or create the global cache instance."""
    global _cache_instance
    if _cache_instance is None:
        _cache_instance = PujaAPICache()
    return _cache_instance


async def get_puja_info(query: str, use_llm: bool = False, include_pricing: bool = False) -> str:
    """
    Convenience function to search puja information with optional LLM enhancement.
    
    TOKEN OPTIMIZATION:
    - By default (use_llm=False): Uses fast keyword matching, zero LLM tokens
    - When use_llm=True: Falls back to LLM for semantic matching if keyword search fails
    - LLM only sees lightweight index (names, IDs, categories), not full data
    - Full details fetched only for matched pujas
    
    SPIRITUAL-FIRST APPROACH:
    - include_pricing=False (DEFAULT): Returns ONLY spiritual/ritual details
    - include_pricing=True: Returns spiritual details + pricing breakdown
    
    Args:
        query: Search query for puja information
        use_llm: Enable LLM-based semantic matching for complex queries (default: False)
        include_pricing: Include pricing information in response (default: False)
    
    Returns:
        Formatted puja information
    
    Examples:
        # Spiritual details only (no pricing)
        await get_puja_info("Ganapati Homam")
        
        # With pricing information
        await get_puja_info("Ganapati Homam", include_pricing=True)
        
        # Complex semantic search with LLM fallback and pricing
        await get_puja_info("puja for new baby", use_llm=True, include_pricing=True)
    """
    cache = get_cache_instance()
    return await cache.search_puja(query, use_llm=use_llm, include_pricing=include_pricing)


async def refresh_cache() -> bool:
    """
    Force refresh the puja cache from API.
    
    Returns:
        True if refresh successful, False otherwise
    """
    cache = get_cache_instance()
    data = await cache.get_puja_data(force_refresh=True)
    return data is not None