"""
Course Statistics Utility
Analyzes the course outline file (output/{hashid}.md) to provide module and lesson counts
"""
import os
import re
from pathlib import Path

class CourseStats:
    def __init__(self, project_root=None):
        """Initialize with project root path"""
        if project_root is None:
            # Auto-detect project root (user_journey_service/ is inside project root)
            # Current file: user_journey_service/course_stats.py
            # Project root: parent of user_journey_service/
            current_dir = os.path.dirname(os.path.abspath(__file__))
            self.project_root = os.path.dirname(current_dir)  # Go up one level to project root
            print(f"📁 Auto-detected project root: {self.project_root}")
        else:
            self.project_root = project_root
    
    def get_course_stats(self, hashid):
        """
        Analyze the course outline file and return statistics
        
        Args:
            hashid: The hashid of the course
            
        Returns:
            dict: Course statistics including module count, lesson counts, etc.
        """
        # Path to the course outline file (outside user_journey_service)
        outline_path = os.path.join(self.project_root, "output", f"{hashid}.md")
        
        print(f"🔍 Looking for course outline at: {outline_path}")
        print(f"📁 Output directory exists: {os.path.exists(os.path.dirname(outline_path))}")
        print(f"📄 File exists: {os.path.exists(outline_path)}")
        
        if not os.path.exists(outline_path):
            print(f"❌ File does not exist at: {outline_path}")
            return {
                "hashid": hashid,
                "exists": False,
                "error": "Course outline file not found. Please run /run-journey first.",
                "searched_location": outline_path
            }
        
        try:
            print("📖 Opening file...")
            with open(outline_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # Parse the course outline
            stats = self._parse_course_outline(content, hashid)
            
            # Verify that stats has the required structure
            if not isinstance(stats, dict):
                print(f"❌ Stats is not a dictionary: {type(stats)}")
                return {
                    "hashid": hashid,
                    "exists": True,
                    "error": "Parser returned invalid data structure"
                }
            
            # Ensure required keys exist
            required_keys = ["course_title", "module_count", "total_lessons", "modules"]
            missing_keys = [key for key in required_keys if key not in stats]
            if missing_keys:
                print(f"❌ Missing required keys in stats: {missing_keys}")
                print(f"📊 Stats keys found: {list(stats.keys())}")
                # Add missing keys with default values
                for key in missing_keys:
                    if key == "course_title":
                        stats["course_title"] = "Unknown Course"
                    elif key == "module_count":
                        stats["module_count"] = 0
                    elif key == "total_lessons":
                        stats["total_lessons"] = 0
                    elif key == "modules":
                        stats["modules"] = []
            
            return stats
            
        except Exception as e:
            print(f"❌ Error parsing file: {str(e)}")
            import traceback
            traceback.print_exc()
            return {
                "hashid": hashid,
                "exists": True,
                "error": f"Error parsing course outline: {str(e)}"
            }
    
    def _parse_course_outline(self, content, hashid):
        """Parse the markdown content to extract course structure"""
        print("=" * 50)
        print("📝 Parsing course outline...")
        print("=" * 50)
        
        # Find course title (the main heading with **)
        course_title_match = re.search(r'^#\s+\*\*(.+?)\*\*', content, re.MULTILINE)
        course_title = course_title_match.group(1) if course_title_match else "Unknown Course"
        print(f"📚 Course Title: {course_title}")
        
        # Find all modules (## **Module X: ...**)
        # Pattern matches: ## **Module 1: Foundation - Understanding Generative AI**
        module_pattern = r'^##\s+\*\*Module\s+(\d+):\s*([^*]+)\*\*'
        modules = re.findall(module_pattern, content, re.MULTILINE)
        
        module_count = len(modules)
        print(f"📊 Found {module_count} modules")
        
        modules_list = []
        total_lessons = 0
        
        # For each module, find its lessons
        for module_idx, (module_num, module_title) in enumerate(modules, start=1):
            module_num = int(module_num)
            module_title = module_title.strip()
            print(f"\n📦 Module {module_num}: {module_title}")
            
            # Extract module section (content between this module and next module)
            module_section = self._extract_module_section(content, module_num)
            
            # Find module details (Focus, Outcome, Duration)
            module_focus = self._extract_module_focus(module_section)
            module_outcome = self._extract_module_outcome(module_section)
            module_duration = self._extract_module_duration(module_section)
            
            # Find all lessons in this module
            # Pattern: ### **Lesson X.Y: Lesson Title**
            lesson_pattern = r'###\s+\*\*Lesson\s+(\d+\.\d+):\s*([^*]+)\*\*'
            lessons = re.findall(lesson_pattern, module_section, re.MULTILINE)
            
            lesson_count = len(lessons)
            total_lessons += lesson_count
            print(f"  📖 Found {lesson_count} lessons")
            
            lesson_details = []
            for lesson_num, lesson_title in lessons:
                lesson_title = lesson_title.strip()
                print(f"    • Lesson {lesson_num}: {lesson_title}")
                
                # Extract lesson details
                lesson_section = self._extract_lesson_section(module_section, lesson_num)
                lesson_duration = self._extract_lesson_duration(lesson_section)
                lesson_topics = self._extract_lesson_topics(lesson_section)
                lesson_outcome = self._extract_lesson_outcome(lesson_section)
                
                lesson_details.append({
                    "lesson_number": lesson_num,
                    "lesson_title": lesson_title,
                    "duration": lesson_duration,
                    "topics": lesson_topics,
                    "outcome": lesson_outcome
                })
            
            modules_list.append({
                "module_number": module_num,
                "module_title": module_title,
                "focus": module_focus,
                "outcome": module_outcome,
                "duration": module_duration,
                "lesson_count": lesson_count,
                "lessons": lesson_details
            })
        
        # Calculate estimated files
        total_content_audio = total_lessons
        
        print("\n" + "=" * 50)
        print(f"📊 SUMMARY")
        print("=" * 50)
        print(f"Course: {course_title}")
        print(f"Total Modules: {module_count}")
        print(f"Total Lessons: {total_lessons}")
        print(f"Estimated Content Files: {total_lessons}")
        print(f"Estimated Audio Files: {total_content_audio}")
        print("=" * 50)
        
        return {
            "hashid": hashid,
            "exists": True,
            "course_title": course_title,
            "module_count": module_count,
            "total_lessons": total_lessons,
            "modules": modules_list,
            "estimated_output": {
                "content_files": total_lessons,
                "content_audio_files": total_content_audio,
                "total_audio_files": total_content_audio
            },
            "file_paths": {
                "course_outline": f"/output/{hashid}.md",
                "content_directory": f"/content/{hashid}/",
                "updated_content_directory": f"/updated_content/{hashid}/",
                "audio_lessons_directory": f"/audio/lessons/{hashid}/"
            }
        }
    
    def _extract_module_section(self, content, module_num):
        """Extract the section of content belonging to a specific module"""
        # Find the start of this module
        start_pattern = rf'^##\s+\*\*Module\s+{module_num}:'
        start_match = re.search(start_pattern, content, re.MULTILINE)
        
        if not start_match:
            return ""
        
        start_pos = start_match.start()
        
        # Find the start of next module or end of file
        next_module_pattern = rf'^##\s+\*\*Module\s+{module_num + 1}:'
        next_match = re.search(next_module_pattern, content[start_pos + 1:], re.MULTILINE)
        
        if next_match:
            end_pos = start_pos + 1 + next_match.start()
            return content[start_pos:end_pos]
        else:
            return content[start_pos:]
    
    def _extract_module_focus(self, section):
        """Extract module focus from module section"""
        focus_match = re.search(r'### \*\*Module Focus\*\*\s*\n(.*?)(?=\n###|\Z)', section, re.DOTALL)
        return focus_match.group(1).strip() if focus_match else ""
    
    def _extract_module_outcome(self, section):
        """Extract module outcome from module section"""
        outcome_match = re.search(r'### \*\*Module Outcome\*\*\s*\n(.*?)(?=\n###|\Z)', section, re.DOTALL)
        return outcome_match.group(1).strip() if outcome_match else ""
    
    def _extract_module_duration(self, section):
        """Extract module duration from module section"""
        duration_match = re.search(r'### \*\*Total Module Duration\*\*\s*\n(.*?)(?=\n###|\Z)', section, re.DOTALL)
        return duration_match.group(1).strip() if duration_match else ""
    
    def _extract_lesson_section(self, module_section, lesson_num):
        """Extract a specific lesson section from module section"""
        pattern = rf'### \*\*Lesson {re.escape(lesson_num)}:.*?\*\*(.*?)(?=\n### \*\*Lesson|\Z)'
        match = re.search(pattern, module_section, re.DOTALL)
        return match.group(1) if match else ""
    
    def _extract_lesson_duration(self, lesson_section):
        """Extract lesson duration from lesson section"""
        duration_match = re.search(r'#### \*\*Duration\*\*\s*\n(.*?)(?=\n####|\Z)', lesson_section, re.DOTALL)
        return duration_match.group(1).strip() if duration_match else ""
    
    def _extract_lesson_topics(self, lesson_section):
        """Extract lesson topics from lesson section"""
        topics_match = re.search(r'#### \*\*Topics Covered\*\*\s*\n(.*?)(?=\n####|\Z)', lesson_section, re.DOTALL)
        if topics_match:
            topics_text = topics_match.group(1).strip()
            # Split by bullet points
            topics = re.findall(r'[-•]\s*(.*?)(?=\n[-•]|\Z)', topics_text, re.DOTALL)
            if not topics:
                # If no bullet points, split by newlines
                topics = [t.strip() for t in topics_text.split('\n') if t.strip()]
            return topics
        return []
    
    def _extract_lesson_outcome(self, lesson_section):
        """Extract lesson outcome from lesson section"""
        outcome_match = re.search(r'#### \*\*Outcome\*\*\s*\n(.*?)(?=\n####|\Z)', lesson_section, re.DOTALL)
        return outcome_match.group(1).strip() if outcome_match else ""
    
    def get_formatted_stats(self, hashid):
        """Get course statistics in a nicely formatted dictionary for API response"""
        stats = self.get_course_stats(hashid)
        
        # Check if stats is None or doesn't have required keys
        if not stats or not isinstance(stats, dict):
            print(f"❌ Invalid stats returned: {stats}")
            return {
                "hashid": hashid,
                "exists": False,
                "error": "Failed to parse course outline",
                "warning": "Course outline not found or invalid format. Please ensure /run-journey was called first.",
                "note": "Jobs are still queued but course outline is missing."
            }
        
        # Check if there's an error in the stats
        if stats.get("error"):
            print(f"❌ Error in stats: {stats['error']}")
            return {
                "hashid": hashid,
                "exists": False,
                "error": stats["error"],
                "warning": "Course outline not found. Please ensure /run-journey was called first.",
                "note": "Jobs are still queued but course outline is missing.",
                "searched_location": stats.get("searched_location", "unknown")
            }
        
        # Check if exists flag is False
        if not stats.get("exists", False):
            return {
                "hashid": hashid,
                "exists": False,
                "warning": "Course outline not found. Please ensure /run-journey was called first.",
                "note": "Jobs are still queued but course outline is missing.",
                "searched_location": stats.get("searched_location", "unknown")
            }
        
        # Safely get values with defaults
        try:
            return {
                "hashid": hashid,
                "exists": True,
                "course_title": stats.get("course_title", "Unknown Course"),
                "summary": f"Course contains {stats.get('module_count', 0)} modules with {stats.get('total_lessons', 0)} lessons",
                "module_count": stats.get("module_count", 0),
                "total_lessons": stats.get("total_lessons", 0),
                "estimated_files": {
                    "content_files": stats.get("estimated_output", {}).get("content_files", 0),
                    "audio_files": stats.get("estimated_output", {}).get("total_audio_files", 0)
                },
                "modules": [
                    {
                        "module_number": m.get("module_number"),
                        "title": m.get("module_title"),
                        "lesson_count": m.get("lesson_count", 0)
                    }
                    for m in stats.get("modules", [])
                ]
            }
        except Exception as e:
            print(f"❌ Error formatting stats: {e}")
            return {
                "hashid": hashid,
                "exists": False,
                "error": f"Error formatting course stats: {str(e)}",
                "warning": "Course outline exists but could not be parsed properly.",
                "note": "Jobs are still queued but course preview is unavailable."
            }


# Singleton instance for easy import
course_stats = CourseStats()


def get_course_stats(hashid):
    """Convenience function to get course stats"""
    return course_stats.get_formatted_stats(hashid)