import sys
from fastapi import HTTPException
import os
from ..utils.hashing import HashGenerator
from user_journey_service.crew import UserJourney 
from user_journey_service.processors.duration_estimator import MicrolearningDurationEstimator
from user_journey_service.processors.StagewiseCourseParser import CourseOutlineParser
from user_journey_service.processors.content_reviewer import ContentReviewer
from user_journey_service.processors.user_journey_synthesizer import Synthesizer
from user_journey_service.tools.custom_stt_tool import LiveWhisperSTTTool
# from user_journey_service.tools.custom_tts_tool import RealTime_TTS
duration_estimator = MicrolearningDurationEstimator()
content_reviewer = ContentReviewer()
synthesizer = Synthesizer()
stt_tool = LiveWhisperSTTTool()
# tts_tool = RealTime_TTS()
from pathlib import Path
import re
import whisper
from gtts import gTTS
from pydub import AudioSegment
import sys
import subprocess

# Configure pydub to use ffmpeg correctly
def configure_pydub():
    """Configure pydub with ffmpeg paths"""
    current_dir = os.path.dirname(os.path.abspath(__file__))
    
    # Look for ffmpeg in current directory or parent directories
    def find_ffmpeg():
        # Check current directory
        ffmpeg_path = os.path.join(current_dir, 'ffmpeg.exe')
        if os.path.exists(ffmpeg_path):
            return ffmpeg_path
        
        # Check parent directories (up to 3 levels)
        for i in range(1, 4):
            parent_dir = os.path.join(current_dir, *['..'] * i)
            ffmpeg_path = os.path.abspath(os.path.join(parent_dir, 'ffmpeg.exe'))
            if os.path.exists(ffmpeg_path):
                return ffmpeg_path
        
        return None
    
    ffmpeg_path = find_ffmpeg()
    
    if ffmpeg_path:
        # Set converter path
        AudioSegment.converter = ffmpeg_path
        
        # Try to find ffprobe in same directory
        ffprobe_path = os.path.join(os.path.dirname(ffmpeg_path), 'ffprobe.exe')
        if os.path.exists(ffprobe_path):
            AudioSegment.ffprobe = ffprobe_path
        else:
            # Use ffmpeg for both if ffprobe not found
            AudioSegment.ffprobe = ffmpeg_path
        
        print(f"✓ Pydub configured with: {ffmpeg_path}")
        return True
    else:
        print("⚠ Warning: ffmpeg.exe not found. Audio conversion may fail.")
        return False

# Configure pydub on module import
pydub_configured = configure_pydub()

class ContentCreationService:
    def __init__(self, input_data):
        self.crew_instance = UserJourney()
        self.input_data = input_data
        self.input_hash = HashGenerator.generate_input_hash(input_data)
        self.research_file = f"research/{self.input_hash}.md"
        self.output_file = f"output/{self.input_hash}.md"
        self.output_file_1 = f"output1/{self.input_hash}.md"
        self.output_file_2 = f"output2/{self.input_hash}.md"
        self.json_output_path = f'parsed_course_content/{self.input_hash}.json'

    def run_content_creation(self):
        """Creates content for each module and lesson and runs user evaluation"""
        try:
            if not os.path.exists(self.output_file):
                return {"status": "failure", "message": "User journey is not created."}

            if os.path.exists(self.output_file):
                print("The user journey is created")
                parser = CourseOutlineParser(self.output_file)
                parsed_result = parser.parse_content()
                os.makedirs(os.path.dirname(self.json_output_path), exist_ok=True)
                parser.export_to_json(self.json_output_path, parsed_result)
                print(f"Course Title: {parsed_result['course_title']}\n")
                course_title = parsed_result['course_title']
                
                # Iterate through modules
                for module_idx, module in enumerate(parsed_result['modules'], start=1):
                    module_title = module['module_title']
                    module_focus = module['focus']
                    module_outcome = module['outcome']
                    module_duration = module['duration']
                    
                    print(f"Processing Module {module_idx}: {module_title}")
                    
                    # Extract module name from module title (remove "Module X: " prefix)
                    module_name = re.sub(r'^Module \d+:\s*', '', module_title).strip()
                    
                    # Iterate through lessons in this module
                    for lesson_idx, lesson in enumerate(module['lessons'], start=1):
                        lesson_title_full = lesson['lesson_title']
                        lesson_duration = lesson['duration']
                        lesson_outcome = lesson['outcome']
                        lesson_topics = lesson['topics']
                        
                        # Extract lesson number (e.g., "1.1", "1.2") from lesson title
                        lesson_number_match = re.search(r'Lesson (\d+\.\d+)', lesson_title_full)
                        if lesson_number_match:
                            lesson_number = lesson_number_match.group(1)
                        else:
                            # Fallback: generate lesson number from module and lesson indices
                            lesson_number = f"{module_idx}.{lesson_idx}"
                        
                        # Extract lesson name (remove "Lesson X.Y: " prefix)
                        lesson_name = re.sub(r'^Lesson \d+\.\d+:\s*', '', lesson_title_full).strip()
                        
                        # Calculate word count based on lesson duration
                        duration_match = re.search(r'(\d+)', lesson_duration)
                        if duration_match:
                            minutes = int(duration_match.group())
                            lower_range = minutes * 120
                            upper_range = minutes * 130
                        else:
                            # Default word count if duration not found
                            lower_range = 20 * 120  # 20 minutes default
                            upper_range = 20 * 130
                        
                        print(f"  Processing Lesson {lesson_number}: {lesson_name}")
                        print(f"    Duration: {lesson_duration}")
                        print(f"    Outcome: {lesson_outcome}")
                        print(f"    Topics: {lesson_topics}")
                        
                        if self._is_first_iteration():
                            if self._has_updated_content_questions(module_idx, lesson_number):
                                if self._has_audio_content_and_questions(module_idx, lesson_number):
                                    print("Audio files for content and questions are available")
                                else:
                                    print("Audio files are not available")
                                    self._create_audio_files(module_idx, lesson_number)
                            else:
                                print("Content and questions are not available")
                                self._create_new_lesson(
                                    module_idx=module_idx,
                                    module_name=module_name,
                                    module_focus=module_focus,
                                    module_outcome=module_outcome,
                                    module_duration=module_duration,
                                    lesson_number=lesson_number,
                                    lesson_name=lesson_name,
                                    lesson_duration=lesson_duration,
                                    lesson_outcome=lesson_outcome,
                                    lesson_topics=lesson_topics,
                                    course_title=course_title,
                                    word_count_lower=lower_range,
                                    word_count_upper=upper_range
                                )
                                self._create_audio_files(module_idx, lesson_number)
                        else:
                            print("Content and questions are not available")
                            self._create_new_lesson(
                                module_idx=module_idx,
                                module_name=module_name,
                                module_focus=module_focus,
                                module_outcome=module_outcome,
                                module_duration=module_duration,
                                lesson_number=lesson_number,
                                lesson_name=lesson_name,
                                lesson_duration=lesson_duration,
                                lesson_outcome=lesson_outcome,
                                lesson_topics=lesson_topics,
                                course_title=course_title,
                                word_count_lower=lower_range,
                                word_count_upper=upper_range
                            )
                            self._create_audio_files_without_check(module_idx, lesson_number)

                return {"status": "success", "message": "Contents are created.", "hashid": self.input_hash}

        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error parsing result: {e}")

    def run_content_creation_04_01_2026(self):
        """Creates content for each stage and run user evaluation"""
        try:
            if not os.path.exists(self.output_file):
                return {"status": "failure", "message": "User journey is not created."}

            if os.path.exists(self.output_file):
                print("The user journey is created")
                parser = CourseOutlineParser(self.output_file)
                parsed_result = parser.parse_content()
                print(parsed_result)
                os.makedirs(os.path.dirname(self.json_output_path), exist_ok=True)
                parser.export_to_json(self.json_output_path, parsed_result)
                print(f"Main Heading: {parsed_result['main_heading']}\n")
                main_heading = parsed_result['main_heading']
                print(parsed_result)
                for idx, stage in enumerate(parsed_result['stages'], start=1):
                    print(f"The iteration : {idx}")
                    if self._is_first_iteration():
                        if self._has_updated_content_questions(idx):
                            if self._has_audio_content_and_questions(idx):
                                print("Audio files for content and questions are available")
                            else:
                                print("Audio files are not available")
                                self._create_audio_files(idx)
                        else:
                            print("content and question is not available")
                            self._create_new_stage(idx, stage, main_heading)
                            self._create_audio_files(idx)
                    else:
                        print("content and question is not available")
                        self._create_new_stage(idx, stage, main_heading)
                        self._create_audio_files_without_check(idx)

                return {"status": "success", "message": "contents are created.", "hashid": self.input_hash}

        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error parsing result: {e}")
        
    def _is_first_iteration(self):
        print(f"Inside feedback test : {self.input_data.feedback}")
        if self.input_data.feedback == "First iteration":
            return True
        return False
    def _has_updated_content_questions(self, module_idx, lesson_number):
        """Check if content and questions already exist for a lesson"""
        # Convert lesson_number (e.g., "1.1") to safe filename format
        lesson_safe = lesson_number.replace('.', '_')
        
        updated_content_file = Path(f'updated_content/{self.input_hash}/module{module_idx}_lesson{lesson_safe}.md')
        question_file = Path(f"question/{self.input_hash}/module{module_idx}_lesson{lesson_safe}.md")
        
        return updated_content_file.exists() and question_file.exists()

    def _has_audio_content_and_questions(self, module_idx, lesson_number):
        """Check if audio files exist for a lesson"""
        print("Inside audio files availability check function")
        
        # Convert lesson_number (e.g., "1.1") to safe filename format
        lesson_safe = lesson_number.replace('.', '_')
        
        # Audio file paths
        audio_content = Path(f'audio/lessons/{self.input_hash}/module{module_idx}_lesson{lesson_safe}.wav')
        audio_question1 = Path(f'audio/questions/{self.input_hash}/module{module_idx}_lesson{lesson_safe}/1.wav')
        audio_question2 = Path(f'audio/questions/{self.input_hash}/module{module_idx}_lesson{lesson_safe}/2.wav')
        audio_question3 = Path(f'audio/questions/{self.input_hash}/module{module_idx}_lesson{lesson_safe}/3.wav')
        
        result = (audio_content.exists() and 
                audio_question1.exists() and 
                audio_question2.exists() and 
                audio_question3.exists())
        
        print(f"The output is : {result}")
        return result 
           
    def _has_updated_content_questions_04_01_2026(self, idx):
        updated_content_file = Path(f'updated_content/{self.input_hash}/stage{idx}.md')
        question_file = Path(f"question/{self.input_hash}/stage{idx}.md")
        return updated_content_file.exists() and question_file.exists()
    
    def _has_audio_content_and_questions_04_01_2026(self, idx):
        print("Inside audio files availability check function")
        audio_content = Path(f'audio/stage/{self.input_hash}/stage{idx}.wav')
        audio_question1 = Path(f'audio/questions/{self.input_hash}/stage{idx}/1.wav')
        audio_question2 = Path(f'audio/questions/{self.input_hash}/stage{idx}/2.wav')
        audio_question3 = Path(f'audio/questions/{self.input_hash}/stage{idx}/3.wav')
        result = audio_content.exists() and audio_question1.exists() and audio_question2.exists() and audio_question3.exists()
        print(f"The output is : {result}")
        return result

    def _run_evaluation(self, idx):
        all_qns_answer = {}
        audio_content = Path(f'audio/stage/{self.input_hash}/stage{idx}.wav')
        print(f"The audio content is available at : {audio_content}")
        audio_question = Path(f'audio/questions/{self.input_hash}/stage{idx}/')
        wav_files = sorted(audio_question.glob('*.wav'))
        for wav_file in wav_files:
            print(wav_file)
            question = self.wav_to_text_whisper(wav_file)
            input("🎤 Press Enter when you're ready to answer...")
            print("📢 Listening to your answer...")
            answer = str(input("Enter your answer: "))
            inputs = {"question": question, "answer": answer}
            all_qns_answer[question] = answer
            crew = self.crew_instance.evaluator_crew()
            crew.kickoff(inputs=inputs)
        inputs = {"users_response": all_qns_answer}
        crew = self.crew_instance.assessment_crew()
        crew.kickoff(inputs=inputs)
    
    
    def _create_audio_files(self, module_idx, lesson_number):
        """Create audio files for a lesson"""
        # Convert lesson_number (e.g., "1.1") to safe filename format
        lesson_safe = lesson_number.replace('.', '_')
        
        # Content audio
        audio_content = Path(f'audio/lessons/{self.input_hash}/module{module_idx}_lesson{lesson_safe}.wav')
        if audio_content.exists():
            print(f"Audio for the content is already available at: {audio_content}")
        else:
            os.makedirs(os.path.dirname(f"audio/lessons/{self.input_hash}/"), exist_ok=True)
            content_file = Path(f'updated_content/{self.input_hash}/module{module_idx}_lesson{lesson_safe}.md')
            with open(content_file, 'r', encoding='utf-8') as f:
                text_content = f.read()
            self.text_to_wav(text_content, audio_content)
        
        # Question audio
        audio_question_dir = Path(f'audio/questions/{self.input_hash}/module{module_idx}_lesson{lesson_safe}/')
        wav_files = sorted(audio_question_dir.glob('*.wav'))
        if not wav_files:
            os.makedirs(audio_question_dir, exist_ok=True)
            print("Question audio is not available")
            self._create_audio_question(module_idx, lesson_number)

    def _create_audio_files_without_check(self, module_idx, lesson_number):
        """Create audio files for a lesson without checking if they exist"""
        # Convert lesson_number (e.g., "1.1") to safe filename format
        lesson_safe = lesson_number.replace('.', '_')
        
        # Create content audio
        audio_content = Path(f'audio/lessons/{self.input_hash}/module{module_idx}_lesson{lesson_safe}.wav')
        os.makedirs(os.path.dirname(f"audio/lessons/{self.input_hash}/"), exist_ok=True)
        content_file = Path(f'updated_content/{self.input_hash}/module{module_idx}_lesson{lesson_safe}.md')
        with open(content_file, 'r', encoding='utf-8') as f:
            text_content = f.read()
        self.text_to_wav(text_content, audio_content)
        
        # Create question audio directory
        audio_question_dir = Path(f'audio/questions/{self.input_hash}/module{module_idx}_lesson{lesson_safe}/')
        os.makedirs(audio_question_dir, exist_ok=True)
        print("Question audio is not available")
        self._create_audio_question(module_idx, lesson_number)
        
    def _create_audio_files_04_01_2026(self, idx):
        audio_content = Path(f'audio/stage/{self.input_hash}/stage{idx}.wav')
        if audio_content.exists():
            print(f"Audio for the content is already available at :{audio_content}")
        else:
            os.makedirs(os.path.dirname(f"audio/stage/{self.input_hash}/"), exist_ok=True)
            content_file = Path(f'updated_content/{self.input_hash}/stage{idx}.md')
            with open(content_file, 'r', encoding='utf-8') as f:
                text_content = f.read()
            self.text_to_wav(text_content, audio_content)
        
        audio_question = Path(f'audio/questions/{self.input_hash}/stage{idx}/')
        wav_files = sorted(audio_question.glob('*.wav'))
        if not wav_files:
            os.makedirs(os.path.dirname(f"audio/questions/{self.input_hash}/stage{idx}/"), exist_ok=True)
            print("question audio is not available")
            self._create_audio_question(idx)

    def _create_audio_files_without_check_04_01_2026(self, idx):
        audio_content = Path(f'audio/stage/{self.input_hash}/stage{idx}.wav')
        os.makedirs(os.path.dirname(f"audio/stage/{self.input_hash}/"), exist_ok=True)
        content_file = Path(f'updated_content/{self.input_hash}/stage{idx}.md')
        with open(content_file, 'r', encoding='utf-8') as f:
            text_content = f.read()
        self.text_to_wav(text_content, audio_content)
        os.makedirs(os.path.dirname(f"audio/questions/{self.input_hash}/stage{idx}/"), exist_ok=True)
        print("question audio is not available")
        self._create_audio_question(idx)


    def _create_audio_question(self, module_idx, lesson_number):
        """Create audio files for questions for a specific lesson"""
        # Convert lesson_number (e.g., "1.1") to safe filename format
        lesson_safe = lesson_number.replace('.', '_')
        
        question_file = Path(f"question/{self.input_hash}/module{module_idx}_lesson{lesson_safe}.md")
        
        if question_file.exists():
            with open(question_file, 'r', encoding='utf-8') as f:
                questions = f.read()
            
            # Parse questions properly
            parsed_questions = self.parse_questions(questions)
            print(f"Found {len(parsed_questions)} questions to convert to audio for Module {module_idx}, Lesson {lesson_number}")
            
            for index, question in enumerate(parsed_questions, start=1):
                print(f"\n🧠 Asking Question {index} for Module {module_idx}, Lesson {lesson_number}: {question[:100]}...\n")
                audio_question = Path(f'audio/questions/{self.input_hash}/module{module_idx}_lesson{lesson_safe}/{index}.wav')
                self.text_to_wav(question, audio_question)
        else:
            print(f"Question file not found: {question_file}")
            
    def _create_audio_question_04_01_2026(self, idx):
        question_file = Path(f"question/{self.input_hash}/stage{idx}.md")
        with open(question_file, 'r', encoding='utf-8') as f:
            questions = f.read()
        
        # Parse questions properly
        parsed_questions = self.parse_questions(questions)
        print(f"Found {len(parsed_questions)} questions to convert to audio")
        
        for index, question in enumerate(parsed_questions, start=1):
            print(f"\n🧠 Asking Question {index}: {question[:100]}...\n")
            audio_question = Path(f'audio/questions/{self.input_hash}/stage{idx}/{index}.wav')
            self.text_to_wav(question, audio_question)

    def parse_questions(self, md_text):
        """Parse questions from markdown text"""
        # Clean the text first
        lines = md_text.split('\n')
        questions = []
        current_question = []
        
        for line in lines:
            line = line.strip()
            if line.startswith('### Q') or line.startswith('### Q'):
                if current_question:
                    questions.append(' '.join(current_question).strip())
                    current_question = []
                # Remove the ### Q1: prefix
                line = re.sub(r'^### Q\d+:\s*', '', line)
                line = re.sub(r'^### Q\d+:.*?\s*', '', line)
            
            if line and not line.startswith('###'):
                current_question.append(line)
        
        if current_question:
            questions.append(' '.join(current_question).strip())
        
        # Filter out empty questions
        questions = [q for q in questions if q and len(q) > 10]
        
        # If no questions found, use a simpler regex approach
        if not questions:
            pattern = r'### Q\d+:?\s*(.*?)(?=\n### Q\d+:|$)'
            questions = re.findall(pattern, md_text, re.DOTALL | re.IGNORECASE)
            questions = [q.strip() for q in questions if q.strip()]
        
        return questions[:9]  # Return max 9 questions


    def _create_new_lesson(self, module_idx, module_name, module_focus, module_outcome, module_duration,
                        lesson_number, lesson_name, lesson_duration, lesson_outcome, lesson_topics,
                        course_title, word_count_lower, word_count_upper):
        """Create content for a new lesson"""
       
        inputs = self.input_data.dict()
        
        # Extract minutes from lesson duration for word count calculation
        duration_minutes_match = re.search(r'(\d+)', lesson_duration)
        if duration_minutes_match:
            minutes = int(duration_minutes_match.group())
        else:
            minutes = 20  # Default
        
        # Prepare file paths
        content_file = f"content/{self.input_hash}/module{module_idx}_lesson{lesson_number.replace('.', '_')}.md"
        question_file = f"question/{self.input_hash}/module{module_idx}_lesson{lesson_number.replace('.', '_')}.md"
        
        # Prepare complete data structure
        complete_data = {
            "Course": course_title,
            "Module": {
                "number": module_idx,
                "name": module_name,
                "focus": module_focus,
                "outcome": module_outcome,
                "duration": module_duration
            },
            "Lesson": {
                "number": lesson_number,
                "title": lesson_name,
                "duration": lesson_duration,
                "objectives": lesson_outcome,
                "topics": lesson_topics
            }
        }
        
        print(f"The user journey for content creation is: {complete_data}")
        print(f"The word count for content creation is: {word_count_lower} to {word_count_upper}")
        print(f"The level and motive for content creation is: {self.input_data.Level} and {self.input_data.motive}")
        
        # Prepare inputs for the crew
        inputs.update({
            "course_title": course_title,
            "module_number": str(module_idx),
            "module_name": module_name,
            "module_focus": module_focus,
            "module_outcome": module_outcome,
            "lesson_number": lesson_number,
            "lesson_title": lesson_name,
            "lesson_duration": minutes,  # Pass as integer
            "lesson_objectives": lesson_outcome,  # This is actually the outcome field
            "lesson_topics": lesson_topics,  # Already a list from parser
            "user_journey": complete_data,
            "word_count": f"{word_count_lower} to {word_count_upper}"

        })
        # Create content using crew
        print(content_file)
        crew = self.crew_instance.second_stage_crew(output_file=content_file)
        crew.kickoff(inputs=inputs)
        # Read and review the created content
        with open(content_file, 'r', encoding='utf-8') as f:
            text = f.read()
        
        # Format topics for the reviewer
        topic_sections = ", and ".join([f"'{topic}'" for topic in lesson_topics])
        print(f"The topic sections are: {topic_sections}")
        
        # Review and enrich the content
        updated_data = content_reviewer.review_and_enrich_content(
            topic_sections, course_title, complete_data, text, f"{word_count_lower} to {word_count_upper}"
        )
        
        # Create questions using crew
        crew = self.crew_instance.qa_gen_crew(output_file=question_file)
        inputs["content"] = updated_data
        crew.kickoff(inputs=inputs)
        
        # Save updated content
        updated_content_path = f'updated_content/{self.input_hash}/module{module_idx}_lesson{lesson_number.replace(".", "_")}.md'
        os.makedirs(os.path.dirname(updated_content_path), exist_ok=True)
        with open(updated_content_path, 'w', encoding='utf-8') as out_file:
            out_file.write(updated_data)
        
        return "Content and questions created successfully."

    def _create_new_stage_04_01_2026(self, idx, stage, main_heading):
        inputs = self.input_data.dict()
        print(f"Stage {idx}: {stage['stage_title']}")
        stage_no = stage['stage_title']
        print(f"The stage value for content creation is : {stage_no}")
        print(f"  Focus: {stage['focus']}")
        focus = stage['focus']
        print(f"  Outcome: {stage['outcome']}")
        outcome = stage['outcome']
        print(f"  Duration: {stage['duration']}")
        duration = stage['duration']
        print(f"  Topics Covered: {stage['topics_covered']}")
        topics = stage['topics_covered']
        print(f"The topics for content creation is : {topics}")
        complete_data = {
            "Stage": f"Stage {idx}: {stage_no}",
            "Focus": focus,
            "Outcome": outcome,
            "Duration": duration,
            "Topics Covered": topics
        }
        print(f"The user journey for content creation is : {complete_data}")
        
        content_file = f"content/{self.input_hash}/stage{idx}.md"
        question_file = f"question/{self.input_hash}/stage{idx}.md"
        
        match = re.search(r'\d+', duration)
        if match:
            minutes = int(match.group())
            lower_range = minutes * 120
            upper_range = minutes * 130
        
        print(f"The word count for content creation is : {lower_range} to {upper_range}")
        print(f"the level and motive for content creation is : {self.input_data.Level} and {self.input_data.motive}")
        
        inputs["main_heading"] = main_heading
        inputs["stage"] = stage_no
        inputs["topics"] = topics
        inputs["user_journey"] = complete_data
        inputs["word_count"] = f"{lower_range} to {upper_range}"
        
        crew = self.crew_instance.second_stage_crew(output_file=content_file)
        crew.kickoff(inputs=inputs)
        
        with open(content_file, 'r', encoding='utf-8') as f:
            text = f.read()
        
        topic_sections = ", and ".join([f"'{topic}'" for topic in topics])
        print(f"The topic sections are: {topic_sections}")
        
        updated_data = content_reviewer.review_and_enrich_content(
            topic_sections, main_heading, complete_data, text, f"{lower_range} to {upper_range}"
        )
        
        crew = self.crew_instance.qa_gen_crew(output_file=question_file)
        inputs["content"] = updated_data
        crew.kickoff(inputs=inputs)
        
        updated_content_path = f'updated_content/{self.input_hash}/stage{idx}.md'
        os.makedirs(os.path.dirname(updated_content_path), exist_ok=True)
        with open(updated_content_path, 'w', encoding='utf-8') as out_file:
            out_file.write(updated_data)
        
        return "content and questions created successfully.."


    def intelligent_text_cleaner(self, text):
        """
        Intelligently clean text for audio conversion.
        Handles various markdown and formatting patterns.
        """
        if not text:
            return ""
        
        text = str(text)
        
        # Patterns to remove (sounds awkward in speech)
        patterns_to_remove = [
            # Markdown headings
            r'^#{1,6}\s+',
            # Bold/italic markers
            r'\*\*|\*|__|_',
            # Code blocks
            r'```.*?```',
            r'`[^`]+`',
            # Links and images
            r'!?\[.*?\]\(.*?\)',
            # HTML entities
            r'&[a-z]+;',
            # Special formatting
            r'^\s*[-*+]\s+',  # Bullet points
            r'^\s*\d+\.\s+',  # Numbered lists
            r'^>\s+',  # Blockquotes
            # Table formatting
            r'\|-+\|',
            r'^\|.*?\|$',
        ]
        
        for pattern in patterns_to_remove:
            text = re.sub(pattern, '', text, flags=re.MULTILINE | re.DOTALL)
        
        # Replace specific patterns with natural speech
        replacements = {
            # Convert headers to natural speech
            r'^#{1,3}\s+(.*?)\s*$': r'\1. ',
            # Convert bullet points to natural speech
            r'^\s*[-*+]\s+(.*?)\s*$': r'\1. ',
            r'^\s*\d+\.\s+(.*?)\s*$': r'\1. ',
            # Handle common AI output patterns
            r'^(Introduction|Conclusion|Summary|Note|Tip|Warning|Important):\s*': '',
            r'^(Step \d+|Phase \d+|Part \d+):\s*': '',
            # Clean up question patterns
            r'^(Q\d+|Question \d+)[:.]?\s*': '',
            r'^(Easy|Moderate|Difficult|Hard)[-:]?\s*': '',
        }
        
        for pattern, replacement in replacements.items():
            text = re.sub(pattern, replacement, text, flags=re.MULTILINE | re.IGNORECASE)
        
        # Remove excessive punctuation
        text = re.sub(r'[.!?]{2,}', '.', text)
        
        # Normalize whitespace
        lines = []
        for line in text.split('\n'):
            line = line.strip()
            if line:
                # Capitalize first letter if needed
                if line and line[0].islower() and len(line) > 1:
                    line = line[0].upper() + line[1:]
                lines.append(line)
        
        text = ' '.join(lines)  # Join as continuous speech
        
        # Final cleanup
        text = re.sub(r'\s+', ' ', text).strip()
        
        return text

    def optimal_markdown_to_speech(self, text):
        """
        Optimal markdown to speech converter - simpler but effective.
        Preserves semantic meaning without over-engineering.
        """
        if not text:
            return ""
        
        lines = text.split('\n')
        speech_lines = []
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # Handle headings with natural speech
            if line.startswith('# '):
                speech_lines.append(f"Main topic: {line[2:].strip()}")
            elif line.startswith('## '):
                speech_lines.append(f"Section: {line[3:].strip()}")
            elif line.startswith('### '):
                speech_lines.append(f"Subsection: {line[4:].strip()}")
            elif line.startswith('#### '):
                speech_lines.append(f"{line[5:].strip()}")
            
            # Handle lists naturally
            elif line.startswith('- ') or line.startswith('* ') or line.startswith('+ '):
                item = line[2:].strip()
                # Clean inline formatting
                item = re.sub(r'[\*_]{1,2}(.*?)[\*_]{1,2}', r'\1', item)
                item = re.sub(r'`([^`]+)`', r'code \1', item)
                speech_lines.append(f"• {item}")
            
            elif re.match(r'^\d+\.\s+', line):
                item = re.sub(r'^\d+\.\s+', '', line)
                item = re.sub(r'[\*_]{1,2}(.*?)[\*_]{1,2}', r'\1', item)
                speech_lines.append(f"• {item}")
            
            # Handle bold/italic naturally
            else:
                # Remove formatting but keep text
                clean_line = line
                # Remove **bold** and *italic* markers
                clean_line = re.sub(r'\*\*(.*?)\*\*', r'\1', clean_line)
                clean_line = re.sub(r'\*(.*?)\*', r'\1', clean_line)
                # Remove `code` markers
                clean_line = re.sub(r'`([^`]+)`', r'code \1', clean_line)
                # Remove links but keep text
                clean_line = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean_line)
                
                if clean_line.strip():
                    speech_lines.append(clean_line)
        
        # Add natural pauses
        result = []
        for i, line in enumerate(speech_lines):
            result.append(line)
            # Add pause after headings and list items
            if (line.startswith(('Main topic:', 'Section:', 'Subsection:')) or 
                line.startswith('• ')):
                result.append('')  # Pause
        
        return '\n'.join(result).strip()
    


    def markdown_to_speech(self, text):
        """
        Convert markdown to natural speech while preserving semantic meaning.
        Handles: # Headings, **bold**, *italic*, - bullets, 1. numbered lists, `code`, [links](url)
        """
        if not text:
            return ""
        
        text = str(text)
        lines = text.split('\n')
        result_lines = []
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # Handle headings with appropriate speech cues
            if line.startswith('###### '):
                # H6: Smallest heading - say with slight emphasis
                content = line[6:].strip()
                result_lines.append(f"Subsection: {content}")
                
            elif line.startswith('##### '):
                # H5: Small heading
                content = line[5:].strip()
                result_lines.append(f"Minor section: {content}")
                
            elif line.startswith('#### '):
                # H4: Minor heading
                content = line[4:].strip()
                result_lines.append(f"Section: {content}")
                
            elif line.startswith('### '):
                # H3: Subheading - say with clear emphasis
                content = line[3:].strip()
                result_lines.append(f"Topic: {content}")
                
            elif line.startswith('## '):
                # H2: Major section - say with strong emphasis
                content = line[2:].strip()
                result_lines.append(f"Chapter: {content}")
                
            elif line.startswith('# '):
                # H1: Main title - say with maximum emphasis
                content = line[1:].strip()
                result_lines.append(f"Title: {content}")
            
            # Handle bullet points (-, *, +)
            elif re.match(r'^[-*+]\s+', line):
                content = re.sub(r'^[-*+]\s+', '', line)
                # Remove any bold/italic from bullet content
                content = self._remove_inline_formatting(content)
                result_lines.append(f"• {content}")
            
            # Handle numbered lists (1., 2., etc.)
            elif re.match(r'^\d+\.\s+', line):
                match = re.match(r'^(\d+)\.\s+(.*)', line)
                if match:
                    number = match.group(1)
                    content = match.group(2)
                    content = self._remove_inline_formatting(content)
                    result_lines.append(f"Point {number}: {content}")
            
            # Handle checkboxes
            elif re.match(r'^\[[ xX]\]\s+', line):
                content = re.sub(r'^\[[ xX]\]\s+', '', line)
                content = self._remove_inline_formatting(content)
                if '[x]' in line.lower() or '[X]' in line:
                    result_lines.append(f"Completed: {content}")
                else:
                    result_lines.append(f"To do: {content}")
            
            # Handle blockquotes
            elif line.startswith('> '):
                content = line[1:].strip()
                content = self._remove_inline_formatting(content)
                result_lines.append(f"Quote: {content}")
            
            # Handle horizontal rule
            elif re.match(r'^[-*_]{3,}$', line):
                result_lines.append("---")  # Pause marker
            
            # Handle tables (simplify them)
            elif '|' in line and re.search(r'\w.*\|.*\w', line):
                # Skip table formatting lines
                if not re.match(r'^\|?[-:| ]+\|?$', line):
                    # Extract cell content
                    cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                    if cells:
                        result_lines.append(f"Table row: {'; '.join(cells)}")
            
            # Regular text with inline formatting
            else:
                content = self._remove_inline_formatting(line)
                if content:
                    result_lines.append(content)
        
        # Join with appropriate pauses
        return self._add_speech_pauses('\n'.join(result_lines))

    def _remove_inline_formatting(self, text):
        """
        Remove inline markdown formatting while preserving semantic meaning.
        """
        if not text:
            return ""
        
        # Save original for reference
        original = text
        
        # Handle bold with emphasis cue
        def replace_bold(match):
            content = match.group(1)
            return f"{content}"  # In speech, we might say "emphasized" or use tone
        
        # Handle italic with slight emphasis cue
        def replace_italic(match):
            content = match.group(1)
            return f"{content}"  # Subtle emphasis in speech
        
        # Handle inline code
        def replace_inline_code(match):
            content = match.group(1)
            return f"code: {content}"
        
        # Handle strikethrough
        def replace_strikethrough(match):
            content = match.group(1)
            return f"strikethrough: {content}"
        
        # Process in order
        text = re.sub(r'\*\*(.*?)\*\*', replace_bold, text)
        text = re.sub(r'\*(.*?)\*', replace_italic, text)
        text = re.sub(r'__(.*?)__', replace_bold, text)
        text = re.sub(r'_(.*?)_', replace_italic, text)
        text = re.sub(r'`(.*?)`', replace_inline_code, text)
        text = re.sub(r'~~(.*?)~~', replace_strikethrough, text)
        
        # Handle links: [text](url) -> "text (see link)"
        text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
        
        # Handle images: ![alt](url) -> "Image: alt"
        text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', r'Image: \1', text)
        
        # Handle HTML entities
        text = text.replace('&nbsp;', ' ')
        text = text.replace('&amp;', 'and')
        text = text.replace('&lt;', 'less than')
        text = text.replace('&gt;', 'greater than')
        
        return text.strip()

    def _add_speech_pauses(self, text):
        """
        Add natural pauses for speech based on content structure.
        """
        lines = text.split('\n')
        result = []
        
        for i, line in enumerate(lines):
            line = line.strip()
            if not line:
                continue
            
            # Add longer pause after headings
            if line.startswith(('Title:', 'Chapter:', 'Topic:', 'Section:')):
                result.append(line)
                result.append("")  # Extra pause
            
            # Add pause after bullet points and numbered items
            elif line.startswith(('• ', 'Point ', 'Completed:', 'To do:')):
                result.append(line)
                if i < len(lines) - 1 and not lines[i+1].startswith(('• ', 'Point ')):
                    result.append("")  # Pause after list ends
            
            # Add slight pause after regular lines
            else:
                result.append(line)
                # Don't add pause if next line is continuation
                if i < len(lines) - 1 and lines[i+1].strip() and not lines[i+1].startswith(('• ', 'Point ', 'Title:', 'Chapter:')):
                    result.append("")  # Small pause
        
        return '\n'.join(result)

    def text_to_wav(self, text, output_file):
        """Convert text to WAV audio file - SIMPLIFIED VERSION"""
        print(f"Converting text to audio: {output_file}")
    
        try:
            # Clean text
            text = str(text).strip()
            text = self.intelligent_text_cleaner(text)
            #text = self.optimal_markdown_to_speech(text)
            #text = self.markdown_to_speech(text)
            if not text:
                print("Warning: Empty text, skipping audio creation")
                return None
            
            # Create unique temp file name to avoid conflicts
            import uuid
            temp_mp3 = f"temp_audio_{uuid.uuid4().hex[:8]}.mp3"
            
            # Convert text to mp3 using gTTS
            print(f"Creating MP3 with gTTS: {temp_mp3}")
            tts = gTTS(text=text, lang='en', slow=False)
            tts.save(temp_mp3)
            print("✓ gTTS conversion complete")
            
            # Save as MP3 directly (skip WAV conversion)
            mp3_output = str(output_file).replace('.wav', '.mp3')
            
            # Move the temp file to final location
            if os.path.exists(temp_mp3):
                os.replace(temp_mp3, mp3_output)
                print(f"✓ Audio saved as {mp3_output}")
                
                # Create a placeholder WAV file if needed
                #if output_file.endswith('.wav'):
                #    with open(output_file, 'w') as f:
                #        f.write(f"Audio available at: {mp3_output}")
                #    print(f"✓ Created placeholder WAV file: {output_file}")
                
                return mp3_output
                
        except Exception as e:
            print(f"❌ Error in text_to_wav: {e}")
            import traceback
            traceback.print_exc()
            
            # Last resort: save text to file
            error_file = str(output_file) + ".error.txt"
            with open(error_file, 'w', encoding='utf-8') as f:
                f.write(f"Error: {e}\n\nText: {text[:500]}")
            print(f"Saved error details to: {error_file}")
            
        return None
    
    



    def wav_to_text_whisper(self, wav_file):
        print("inside wave to text conversion")
        model = whisper.load_model("base")
        result = model.transcribe(str(wav_file))
        print("\U0001F3A4 Transcription:", result["text"])
        return result["text"]