o
    ufi                     @   s   d dl Z d dlmZ d dlZddlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d d	lmZ d d
lmZ e	 Ze Ze Ze Zd dlmZ d dlZd dlZd dlmZ d dlmZ d dl Z d dlZdd Ze Z G dd dZ!dS )    N)HTTPException   )HashGenerator)UserJourney)MicrolearningDurationEstimator)CourseOutlineParser)ContentReviewer)Synthesizer)LiveWhisperSTTTool)Path)gTTS)AudioSegmentc                     s~   t jt jt  fdd} |  }|r9|t_t jt j|d}t j|r-|t_	n|t_	t
d|  dS t
d dS )z!Configure pydub with ffmpeg pathsc                     sv   t j d} t j| r| S tddD ]$}t jj gdg| R  }t jt j|d} t j| r8|   S qd S )Nz
ffmpeg.exe      z..)ospathjoinexistsrangeabspath)ffmpeg_pathi
parent_dircurrent_dir /home/azureuser/microlearn/backend/user_journey_with_openai/agentic_workflow/src/user_journey_service/services/content_creation_service.pyfind_ffmpeg   s   z$configure_pydub.<locals>.find_ffmpegzffprobe.exeu   ✓ Pydub configured with: Tu=   ⚠ Warning: ffmpeg.exe not found. Audio conversion may fail.F)r   r   dirnamer   __file__r   	converterr   r   ffprobeprint)r   r   ffprobe_pathr   r   r   configure_pydub   s   r$   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3S )4ContentCreationServicec                 C   sn   t  | _|| _t|| _d| j d| _d| j d| _d| j d| _d| j d| _	d| j d| _
d S )Nz	research/.mdzoutput/zoutput1/zoutput2/zparsed_course_content/z.json)r   crew_instance
input_datar   generate_input_hash
input_hashresearch_fileoutput_fileoutput_file_1output_file_2json_output_path)selfr(   r   r   r   __init__F   s   zContentCreationService.__init__c                 C   s  zNt j| jsdddW S t j| jrMtd t| j}| }t jt j| j	dd |
| j	| td|d  d	 |d }t|d
 ddD ]\}}|d }|d }|d }|d }	td| d|  tdd| }
t|d ddD ]\}}|d }|d }|d }|d }td|}|r|d}n| d| }tdd| }td|}|rt| }|d }|d }nd}d}td | d|  td!|  td"|  td#|  |  r'| ||r
| ||rtd$ q|td% | || q|td& | j||
|||	||||||||d' | || q|td& | j||
|||	||||||||d' | || q|qMd(d)| jd*W S W d.S  tye } z	td+d,| d-d.}~ww )/zCCreates content for each module and lesson and runs user evaluationfailureUser journey is not created.statusmessageThe user journey is createdTexist_okzCourse Title: course_title
modulesr   startmodule_titlefocusoutcomedurationzProcessing Module : z^Module \d+:\s* lessonslesson_titletopicszLesson (\d+\.\d+).z^Lesson \d+\.\d+:\s*(\d+)x      i`	  i(
  z  Processing Lesson z    Duration: z    Outcome: z    Topics: 3Audio files for content and questions are availableAudio files are not availablez'Content and questions are not available)
module_idxmodule_namemodule_focusmodule_outcomemodule_durationlesson_numberlesson_namelesson_durationlesson_outcomelesson_topicsr:   word_count_lowerword_count_uppersuccesszContents are created.r5   r6   hashid  Error parsing result: status_codedetailN)r   r   r   r,   r"   r   parse_contentmakedirsr   r/   export_to_json	enumerateresubstripsearchgroupint_is_first_iteration_has_updated_content_questions _has_audio_content_and_questions_create_audio_files_create_new_lesson!_create_audio_files_without_checkr*   	Exceptionr   )r0   parserparsed_resultr:   rN   moduler?   rP   rQ   rR   rO   
lesson_idxlessonlesson_title_fullrU   rV   rW   lesson_number_matchrS   rT   duration_matchminuteslower_rangeupper_rangeer   r   r   run_content_creationP   s   



Mez+ContentCreationService.run_content_creationc              
   C   sz  zt j| jsdddW S t j| jrtd t| j}| }t| t jt j| j	dd |
| j	| td|d  d	 |d }t| t|d
 ddD ]I\}}td|  |  r| |r{| |rqtd qStd | | qStd | ||| | | qStd | ||| | | qSdd| jdW S W dS  ty } z	tdd| dd}~ww )z6Creates content for each stage and run user evaluationr2   r3   r4   r7   Tr8   zMain Heading: main_headingr;   stagesr   r=   zThe iteration : rL   rM   z%content and question is not availablerZ   zcontents are created.r[   r]   r^   r_   N)r   r   r   r,   r"   r   rb   rc   r   r/   rd   re   rl   rm   rn   ro   _create_new_stagerq   r*   rr   r   )r0   rs   rt   r   idxstager~   r   r   r   run_content_creation_04_01_2026   sB   



z6ContentCreationService.run_content_creation_04_01_2026c                 C   s&   t d| jj  | jjdkrdS dS )NzInside feedback test : zFirst iterationTF)r"   r(   feedback)r0   r   r   r   rl      s   z*ContentCreationService._is_first_iterationc                 C   sX   | dd}td| j d| d| d}td| j d| d| d}| o+| S )z9Check if content and questions already exist for a lessonrH   _updated_content//module_lessonr&   	question/)replacer   r*   r   )r0   rN   rS   lesson_safeupdated_content_filequestion_filer   r   r   rm      s   z5ContentCreationService._has_updated_content_questionsc           	      C   s   t d |dd}td| j d| d| d}td| j d| d| d	}td| j d| d| d
}td| j d| d| d}| oU| oU| oU| }t d|  |S )z'Check if audio files exist for a lesson.Inside audio files availability check functionrH   r   audio/lessons/r   r   .wavaudio/questions//1.wav/2.wav/3.wavThe output is : )r"   r   r   r*   r   )	r0   rN   rS   r   audio_contentaudio_question1audio_question2audio_question3resultr   r   r   rn      s   z7ContentCreationService._has_audio_content_and_questionsc                 C   s@   t d| j d| d}t d| j d| d}| o| S )Nr   /stager&   r   )r   r*   r   )r0   r   r   r   r   r   r   )_has_updated_content_questions_04_01_2026	  s   z@ContentCreationService._has_updated_content_questions_04_01_2026c                 C   s   t d td| j d| d}td| j d| d}td| j d| d}td| j d| d}| oC| oC| oC| }t d	|  |S )
Nr   audio/stage/r   r   r   r   r   r   r   )r"   r   r*   r   )r0   r   r   r   r   r   r   r   r   r   +_has_audio_content_and_questions_04_01_2026  s    zBContentCreationService._has_audio_content_and_questions_04_01_2026c                 C   s   i }t d| j d| d}td|  t d| j d| d}t|d}|D ]-}t| | |}td td	 ttd
}||d}	|||< | j	 }
|
j
|	d q*d|i}	| j }
|
j
|	d d S )Nr   r   r   z$The audio content is available at : r   /*.wavu/   🎤 Press Enter when you're ready to answer...u    📢 Listening to your answer...zEnter your answer: )questionanswerinputsusers_response)r   r*   r"   sortedglobwav_to_text_whisperinputstrr'   evaluator_crewkickoffassessment_crew)r0   r   all_qns_answerr   audio_question	wav_fileswav_filer   r   r   crewr   r   r   _run_evaluation  s$   



z&ContentCreationService._run_evaluationc           
      C   s  | dd}td| j d| d| d}| r!td|  n@tjtjd| j dd	d
 td| j d| d| d}t	|ddd}|
 }W d   n1 sVw   Y  | || td| j d| d| d}t|d}	|	stj|d	d
 td | || dS dS )zCreate audio files for a lessonrH   r   r   r   r   r   z/Audio for the content is already available at: r   Tr8   r   r&   rutf-8encodingNr   r   Question audio is not available)r   r   r*   r   r"   r   rc   r   r   openreadtext_to_wavr   r   _create_audio_question)
r0   rN   rS   r   r   content_fileftext_contentaudio_question_dirr   r   r   r   ro   -  s"    
z*ContentCreationService._create_audio_filesc           	      C   s   | dd}td| j d| d| d}tjtjd| j ddd	 td
| j d| d| d}t|ddd}| }W d   n1 sJw   Y  | 	|| td| j d| d| d}tj|dd	 t
d | || dS )z>Create audio files for a lesson without checking if they existrH   r   r   r   r   r   r   Tr8   r   r&   r   r   r   Nr   r   )r   r   r*   r   rc   r   r   r   r   r   r"   r   )	r0   rN   rS   r   r   r   r   r   r   r   r   r   rq   E  s    
z8ContentCreationService._create_audio_files_without_checkc                 C   s  t d| j d| d}| rtd|  n=tjtjd| j ddd t d| j d| d	}t|d
dd}|	 }W d    n1 sJw   Y  | 
|| t d| j d| d}t|d}|stjtjd| j d| ddd td | | d S d S )Nr   r   r   z/Audio for the content is already available at :r   Tr8   r   r&   r   r   r   r   r   question audio is not available)r   r*   r   r"   r   rc   r   r   r   r   r   r   r   r   )r0   r   r   r   r   r   r   r   r   r   r   _create_audio_files_04_01_2026X  s     
&z5ContentCreationService._create_audio_files_04_01_2026c                 C   s   t d| j d| d}tjtjd| j ddd t d| j d| d}t|d	d
d}| }W d    n1 s>w   Y  | || tjtjd| j d| ddd t	d | 
| d S )Nr   r   r   r   Tr8   r   r&   r   r   r   r   r   )r   r*   r   rc   r   r   r   r   r   r"   r   )r0   r   r   r   r   r   r   r   r   ,_create_audio_files_without_check_04_01_2026j  s    
&zCContentCreationService._create_audio_files_without_check_04_01_2026c                 C   s  | dd}td| j d| d| d}| rt|ddd	}| }W d
   n1 s/w   Y  | |}tdt| d| d|  t	|ddD ]1\}}	td| d| d| d|	d
d  d	 td| j d| d| d| d	}
| 
|	|
 qNd
S td|  d
S )z6Create audio files for questions for a specific lessonrH   r   r   r   r   r&   r   r   r   NFound z* questions to convert to audio for Module z	, Lesson r   r=      
🧠 Asking Question z for Module rC   d   ...
r   r   r   zQuestion file not found: )r   r   r*   r   r   r   parse_questionsr"   lenre   r   )r0   rN   rS   r   r   r   	questionsparsed_questionsindexr   r   r   r   r   r   v  s   

*$z-ContentCreationService._create_audio_questionc           	   	   C   s   t d| j d| d}t|ddd}| }W d    n1 s"w   Y  | |}tdt| d t|d	d
D ](\}}td| d|d d  d t d| j d| d| d}| || q<d S )Nr   r   r&   r   r   r   r   z questions to convert to audior   r=   r   rC   r   r   r   r   r   )	r   r*   r   r   r   r"   r   re   r   )	r0   r   r   r   r   r   r   r   r   r   r   r   !_create_audio_question_04_01_2026  s   

z8ContentCreationService._create_audio_question_04_01_2026c                 C   s   | d}g }g }|D ]8}| }|ds|dr7|r)|d|  g }tdd|}tdd|}|rC|dsC|| q|rP|d|  dd	 |D }|smd
}t||tjtj	B }dd	 |D }|dd S )z"Parse questions from markdown textr;   z### Q z^### Q\d+:\s*rD   z^### Q\d+:.*?\s*z###c                 S   s    g | ]}|rt |d kr|qS )
   )r   .0qr   r   r   
<listcomp>  s     z:ContentCreationService.parse_questions.<locals>.<listcomp>z#### Q\d+:?\s*(.*?)(?=\n### Q\d+:|$)c                 S      g | ]
}|  r|  qS r   rh   r   r   r   r   r         N	   )
splitrh   
startswithappendr   rf   rg   findallDOTALL
IGNORECASE)r0   md_textlinesr   current_questionlinepatternr   r   r   r     s,   

z&ContentCreationService.parse_questionsc                 C   s4  | j  }td|}|rt| }nd}d| j d| d|dd d}d	| j d| d|dd d}||||||d
||||	|
dd}td|  td| d|  td| j j	 d| j j
  ||t||||||||	|
|| d| d t| | jj|d}|j|d t|ddd}| }W d   n1 sw   Y  ddd |
D }td|  t||||| d| }| jj|d}||d< |j|d d| j d| d|dd d}tjtj|dd  t|d!dd}|| W d   d"S 1 sw   Y  d"S )#zCreate content for a new lessonrI      content/r   r   rH   r   r&   r   )numbernamer@   rA   rB   )r   titlerB   
objectivesrG   )CourseModuleLessonz*The user journey for content creation is: z(The word count for content creation is:  to z.The level and motive for content creation is:  and )r:   module_numberrO   rP   rQ   rS   rF   rU   lesson_objectivesrW   user_journey
word_countr,   r   r   r   r   N, and c                 S      g | ]}d | d qS 'r   r   topicr   r   r   r         z=ContentCreationService._create_new_lesson.<locals>.<listcomp>The topic sections are: contentr   Tr8   wz+Content and questions created successfully.)r(   dictrf   ri   rk   rj   r*   r   r"   Levelmotiveupdater   r'   second_stage_crewr   r   r   r   content_reviewerreview_and_enrich_contentqa_gen_crewr   rc   r   r   write)r0   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   r:   rX   rY   r   duration_minutes_matchr{   r   r   complete_datar   r   texttopic_sectionsupdated_dataupdated_content_pathout_filer   r   r   rp     sx   
""
"
z)ContentCreationService._create_new_lessonc              	   C   s  | j  }td| d|d   |d }td|  td|d   |d }td|d   |d }td	|d
   |d
 }td|d   |d }	td|	  d| d| ||||	d}
td|
  d| j d| d}d| j d| d}td|}|rt| }|d }|d }td| d|  td| j j d| j j	  ||d< ||d< |	|d< |
|d< | d| |d< | j
j|d }|j|d! t|d"d#d$}| }W d    n1 sw   Y  d%d&d' |	D }td(|  t|||
|| d| }| j
j|d }||d)< |j|d! d*| j d| d}tjtj|d+d, t|d-d#d$}|| W d    d.S 1 sMw   Y  d.S )/NzStage rC   stage_titlez*The stage value for content creation is : z	  Focus: r@   z  Outcome: rA   z  Duration: rB   z  Topics Covered: topics_coveredz%The topics for content creation is : )StageFocusOutcomeDurationzTopics Coveredz+The user journey for content creation is : r   r   r&   r   z\d+rJ   rK   z)The word count for content creation is : r   z/the level and motive for content creation is : r   r   r   rG   r   r   r   r   r   r   r   r   c                 S   r   r   r   r   r   r   r   r   C  r   zGContentCreationService._create_new_stage_04_01_2026.<locals>.<listcomp>r   r   r   Tr8   r   z,content and questions created successfully..)r(   r   r"   r*   rf   ri   rk   rj   r   r  r'   r  r   r   r   r   r  r  r  r   rc   r   r   r  )r0   r   r   r   r   stage_nor@   rA   rB   rG   r	  r   r   matchr{   r|   r}   r   r   r
  r  r  r  r  r   r   r   _create_new_stage_04_01_2026  sn   


z3ContentCreationService._create_new_stage_04_01_2026c                 C   s  |sdS t |}g d}|D ]}tj|d|tjtjB d}qdddddddd}| D ]\}}tj|||tjtjB d}q,tdd|}g }|dD ]'}| }|rs|rn|d	 	 rnt
|d
krn|d	  |d
d  }|| qLd|}tdd| }|S )zz
        Intelligently clean text for audio conversion.
        Handles various markdown and formatting patterns.
        rD   )z
^#{1,6}\s+z\*\*|\*|__|_z	```.*?```z`[^`]+`z!?\[.*?\]\(.*?\)z&[a-z]+;z^\s*[-*+]\s+z^\s*\d+\.\s+z^>\s+z\|-+\|z	^\|.*?\|$)flagsz\1. )z^#{1,3}\s+(.*?)\s*$z^\s*[-*+]\s+(.*?)\s*$z^\s*\d+\.\s+(.*?)\s*$zA^(Introduction|Conclusion|Summary|Note|Tip|Warning|Important):\s*z"^(Step \d+|Phase \d+|Part \d+):\s*z^(Q\d+|Question \d+)[:.]?\s*z'^(Easy|Moderate|Difficult|Hard)[-:]?\s*z	[.!?]{2,}rH   r;   r   r   Nr   z\s+)r   rf   rg   	MULTILINEr   itemsr   r   rh   islowerr   upperr   r   )r0   r
  patterns_to_remover   replacementsreplacementr   r   r   r   r   intelligent_text_cleanerV  s8   

z/ContentCreationService.intelligent_text_cleanerc           	      C   s  |sdS | d}g }|D ]}| }|sq|dr*|d|dd    q|dr>|d|d	d    q|d
rR|d|dd    q|drd||dd    q|dss|dss|dr|dd  }tdd|}tdd|}|d|  qtd|rtdd|}tdd|}|d|  q|}tdd|}tdd|}tdd|}tdd|}| r|| qg }t|D ]\}}|| |ds|dr|d qd| S )z
        Optimal markdown to speech converter - simpler but effective.
        Preserves semantic meaning without over-engineering.
        rD   r;   # zMain topic: r   N## 	Section:    ### Subsection: r   ####    z- z* z+ z[\*_]{1,2}(.*?)[\*_]{1,2}\1z	`([^`]+)`zcode \1   • 	^\d+\.\s+\*\*(.*?)\*\*	\*(.*?)\*\[([^\]]+)\]\([^)]+\))zMain topic:Section:zSubsection:)	r   rh   r   r   rf   rg   r  re   r   )	r0   r
  r   speech_linesr   item
clean_liner   r   r   r   r   optimal_markdown_to_speech  sT   








z1ContentCreationService.optimal_markdown_to_speechc           	      C   s  |sdS t |}|d}g }|D ]d}| }|sq|dr1|dd  }|d|  q|drG|dd  }|d	|  q|d
r]|dd  }|d|  q|drs|dd  }|d|  q|dr|dd  }|d|  q|dr|dd  }|d|  qtd|rtdd|}| |}|d|  qtd|rtd|}|r|	d}|	d}| |}|d| d|  qtd|rtdd|}| |}d|
 v sd|v r|d|  q|d |  q|d!r0|dd  }| |}|d"|  qtd#|r=|d$ qd%|v ritd&|ritd'|shd(d) |d%D }|rh|d*d+|  q| |}|rv|| q| d|S ),z
        Convert markdown to natural speech while preserving semantic meaning.
        Handles: # Headings, **bold**, *italic*, - bullets, 1. numbered lists, `code`, [links](url)
        rD   r;   z######    Nr&  z##### r(  zMinor section: r'  r   r#  r%  r$  zTopic: r"  r   z	Chapter: r!  r   zTitle: z	^[-*+]\s+r*  r+  z^(\d+)\.\s+(.*)Point rC   z^\[[ xX]\]\s+z[x]z[X]zCompleted: zTo do: z> zQuote: z^[-*_]{3,}$z---|z
\w.*\|.*\wz^\|?[-:| ]+\|?$c                 S   r   r   r   )r   cellr   r   r   r   :  r   z=ContentCreationService.markdown_to_speech.<locals>.<listcomp>zTable row: z; )r   r   rh   r   r   rf   r  rg   _remove_inline_formattingrj   lowerri   r   _add_speech_pauses)	r0   r
  r   result_linesr   r   r  r   cellsr   r   r   markdown_to_speech  s|   















z)ContentCreationService.markdown_to_speechc                 C   s   |sdS |}dd }dd }dd }dd	 }t d
||}t d||}t d||}t d||}t d||}t d||}t dd|}t dd|}|dd}|dd}|dd}|dd}| S )zV
        Remove inline markdown formatting while preserving semantic meaning.
        rD   c                 S      |  d}| S Nr   rj   r  r   r   r   r   replace_boldR     
zFContentCreationService._remove_inline_formatting.<locals>.replace_boldc                 S   r>  r?  r@  rA  r   r   r   replace_italicW  rC  zHContentCreationService._remove_inline_formatting.<locals>.replace_italicc                 S      |  d}d| S )Nr   zcode: r@  rA  r   r   r   replace_inline_code\     

zMContentCreationService._remove_inline_formatting.<locals>.replace_inline_codec                 S   rE  )Nr   zstrikethrough: r@  rA  r   r   r   replace_strikethrougha  rG  zOContentCreationService._remove_inline_formatting.<locals>.replace_strikethroughr,  r-  z	__(.*?)__z_(.*?)_z`(.*?)`z	~~(.*?)~~r.  r)  z!\[([^\]]*)\]\([^)]+\)z	Image: \1z&nbsp;r   z&amp;andz&lt;z	less thanz&gt;zgreater than)rf   rg   r   rh   )r0   r
  originalrB  rD  rF  rH  r   r   r   r8  G  s(   z0ContentCreationService._remove_inline_formattingc                 C   s   | d}g }t|D ]_\}}| }|sq|dr&|| |d q|drG|| |t|d k rF||d  dsF|d q|| |t|d k rj||d   rj||d  dsj|d qd|S )zK
        Add natural pauses for speech based on content structure.
        r;   )Title:Chapter:zTopic:r/  rD   )r*  r5  z
Completed:zTo do:r   )r*  r5  )r*  r5  rK  rL  )r   re   rh   r   r   r   r   )r0   r
  r   r   r   r   r   r   r   r:  {  s&   




"

2

z)ContentCreationService._add_speech_pausesc                 C   st  t d|  z]t| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	d
d}|| t d t|	dd}t
j|rbt
	|| t d|  |W S W dS  ty } zIt d|  ddl}|  t|d }	t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W Y d}~dS d}~ww )z3Convert text to WAV audio file - SIMPLIFIED VERSIONzConverting text to audio: z,Warning: Empty text, skipping audio creationNr   temp_audio_   z.mp3zCreating MP3 with gTTS: enF)r
  langslowu   ✓ gTTS conversion completer   u   ✓ Audio saved as u   ❌ Error in text_to_wav: z
.error.txtr   r   r   zError: z

Text: r]   zSaved error details to: )r"   r   rh   r   uuiduuid4hexr   saver   r   r   r   rr   	traceback	print_excr   r  )r0   r
  r,   rR  temp_mp3tts
mp3_outputr~   rV  
error_filer   r   r   r   r     s@   

 z"ContentCreationService.text_to_wavc                 C   s6   t d td}|t|}t d|d  |d S )Nzinside wave to text conversionbaseu   🎤 Transcription:r
  )r"   whisper
load_model
transcriber   )r0   r   modelr   r   r   r   r     s
   
z*ContentCreationService.wav_to_text_whisperN)__name__
__module____qualname__r1   r   r   rl   rm   rn   r   r   r   ro   rq   r   r   r   r   r   rp   r  r   r3  r=  r8  r:  r   r   r   r   r   r   r%   E   s4    
n'

#WBICe4 9r%   )"sysfastapir   r   utils.hashingr   user_journey_service.crewr   2user_journey_service.processors.duration_estimatorr   5user_journey_service.processors.StagewiseCourseParserr   0user_journey_service.processors.content_reviewerr   8user_journey_service.processors.user_journey_synthesizerr	   *user_journey_service.tools.custom_stt_toolr
   duration_estimatorr  synthesizerstt_toolpathlibr   rf   r]  gttsr   pydubr   
subprocessr$   pydub_configuredr%   r   r   r   r   <module>   s0    )