o
    }i&                    @   s   d dl Z d dlmZ d dlZddlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d d	lmZ d d
lmZ e	 Ze Ze Ze Zd dlmZ d dlZd dlZd dlmZ d dlmZ d dl Z d dlZd dlZd dl Z dd Z!e! Z"G dd dZ#dS )    N)HTTPException   )HashGenerator)UserJourney)MicrolearningDurationEstimator)CourseOutlineParser)ContentReviewer)Synthesizer)LiveWhisperSTTTool)PathgTTS)AudioSegmentc                     s~   t jt jt  fdd} |  }|r9|t_t jt j|d}t j|r-|t_	n|t_	t
d|  dS t
d dS )z!Configure pydub with ffmpeg pathsc                     sv   t j d} t j| r| S tddD ]$}t jj gdg| R  }t jt j|d} t j| r8|   S qd S )Nz
ffmpeg.exe      z..)ospathjoinexistsrangeabspath)ffmpeg_pathi
parent_dircurrent_dir /home/azureuser/microlearn/backend/user_journey_with_openai/agentic_workflow/src/user_journey_service/services/content_creation_service.pyfind_ffmpeg"   s   z$configure_pydub.<locals>.find_ffmpegzffprobe.exeu   ✓ Pydub configured with: Tu=   ⚠ Warning: ffmpeg.exe not found. Audio conversion may fail.F)r   r   dirnamer   __file__r   	converterr   r   ffprobeprint)r   r   ffprobe_pathr   r   r   configure_pydub   s   r%   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d4 Zd8d9 Zd:d; Zd<d= Z d>d? Z!d@dA Z"dBS )CContentCreationServicec                 C   sn   t  | _|| _t|| _d| j d| _d| j d| _d| j d| _d| j d| _	d| j d| _
d S )Nz	research/.mdzoutput/zoutput1/zoutput2/zparsed_course_content/z.json)r   crew_instance
input_datar   generate_input_hash
input_hashresearch_fileoutput_fileoutput_file_1output_file_2json_output_path)selfr)   r   r   r   __init__I   s   zContentCreationService.__init__c                 C   sX   | dd}td| j d| d| d}td| j d| d| d}| o+| S )z9Check if content and questions already exist for a lesson._updated_content//module_lessonr'   	question/)replacer   r+   r   )r1   
module_idxlesson_numberlesson_safeupdated_content_filequestion_filer   r   r   _has_updated_content_questionsT   s   z5ContentCreationService._has_updated_content_questionsc           	      C   s   t d |dd}td| j d| d| d}td| j d| d| d	}td| j d| d| d
}td| j d| d| d}| oU| oU| oU| }t d|  |S )z'Check if audio files exist for a lessonz.Inside audio files availability check functionr3   r4   audio/lessons/r6   r7   .wavaudio/questions/z/1.wavz/2.wavz/3.wavzThe output is : )r#   r9   r   r+   r   )	r1   r:   r;   r<   audio_contentaudio_question1audio_question2audio_question3resultr   r   r    _has_audio_content_and_questions^   s   z7ContentCreationService._has_audio_content_and_questionsc                 C   s   i }t d| j d| d}td|  t d| j d| d}t|d}|D ]-}t| | |}td td	 ttd
}||d}	|||< | j	 }
|
j
|	d q*d|i}	| j }
|
j
|	d d S )Naudio/stage//stagerA   z$The audio content is available at : rB   /*.wavu/   🎤 Press Enter when you're ready to answer...u    📢 Listening to your answer...zEnter your answer: )questionanswerinputsusers_response)r   r+   r#   sortedglobwav_to_text_whisperinputstrr(   evaluator_crewkickoffassessment_crew)r1   idxall_qns_answerrC   audio_question	wav_fileswav_filerM   rN   rP   crewr   r   r   _run_evaluations   s$   



z&ContentCreationService._run_evaluationc           
      C   s  | dd}td| j d| d| d}| r!td|  n@tjtjd| j dd	d
 td| j d| d| d}t	|ddd}|
 }W d   n1 sVw   Y  | || td| j d| d| d}t|d}	|	stj|d	d
 td | || dS dS )zCreate audio files for a lessonr3   r4   r@   r6   r7   .mp3z/Audio for the content is already available at: rK   Texist_okr5   r'   rutf-8encodingNrB   *.mp3Question audio is not available)r9   r   r+   r   r#   r   makedirsr   r   openreadtext_to_wavrR   rS   _create_audio_question)
r1   r:   r;   r<   rC   content_fileftext_contentaudio_question_dirr]   r   r   r   _create_audio_files   s"    
z*ContentCreationService._create_audio_filesc           	      C   s   | dd}td| j d| d| d}tjtjd| j ddd	 td
| j d| d| d}t|ddd}| }W d   n1 sJw   Y  | 	|| td| j d| d| d}tj|dd	 t
d | || dS )z>Create audio files for a lesson without checking if they existr3   r4   r@   r6   r7   ra   rK   Trb   r5   r'   rd   re   rf   NrB   ri   )r9   r   r+   r   rj   r   r   rk   rl   rm   r#   rn   )	r1   r:   r;   r<   rC   ro   rp   rq   rr   r   r   r   !_create_audio_files_without_check   s    
z8ContentCreationService._create_audio_files_without_checkc                 C   s  t d| j d| d}| rtd|  n=tjtjd| j ddd t d| j d| d	}t|d
dd}|	 }W d    n1 sJw   Y  | 
|| t d| j d| d}t|d}|stjtjd| j d| ddd td | | d S d S )NrI   rJ   rA   z/Audio for the content is already available at :rK   Trb   r5   r'   rd   re   rf   rB   rL   zquestion audio is not available)r   r+   r   r#   r   rj   r   r   rk   rl   rm   rR   rS   rn   )r1   rZ   rC   ro   rp   rq   r\   r]   r   r   r   _create_audio_files_04_01_2026   s     
&z5ContentCreationService._create_audio_files_04_01_2026c                 C   s  | dd}td| j d| d| d}| rt|ddd	}| }W d
   n1 s/w   Y  | |}tdt| d| d|  t	|ddD ]1\}}	td| d| d| d|	d
d  d	 td| j d| d| d| d	}
| 
|	|
 qNd
S td|  d
S )z6Create audio files for questions for a specific lessonr3   r4   r8   r6   r7   r'   rd   re   rf   NzFound z* questions to convert to audio for Module z	, Lesson r   startu   
🧠 Asking Question z for Module : d   z...
rB   rK   rA   zQuestion file not found: )r9   r   r+   r   rk   rl   parse_questionsr#   len	enumeraterm   )r1   r:   r;   r<   r>   rp   	questionsparsed_questionsindexrM   r\   r   r   r   rn      s   

*$z-ContentCreationService._create_audio_questionc                 C   s   | d}g }g }|D ]8}| }|ds|dr7|r)|d|  g }tdd|}tdd|}|rC|dsC|| q|rP|d|  dd	 |D }|smd
}t||tjtj	B }dd	 |D }|dd S )z"Parse questions from markdown text
z### Q z^### Q\d+:\s* z^### Q\d+:.*?\s*z###c                 S   s    g | ]}|rt |d kr|qS )
   )r{   .0qr   r   r   
<listcomp>   s     z:ContentCreationService.parse_questions.<locals>.<listcomp>z#### Q\d+:?\s*(.*?)(?=\n### Q\d+:|$)c                 S      g | ]
}|  r|  qS r   stripr   r   r   r   r          N	   )
splitr   
startswithappendr   resubfindallDOTALL
IGNORECASE)r1   md_textlinesr}   current_questionlinepatternr   r   r   rz      s,   

z&ContentCreationService.parse_questionsc                 C   s  |sdS t |}g d}|D ]}tj|d|tjtjB d}qdddddddd}| D ]\}}tj|||tjtjB d}q,tdd|}g }|dD ]'}| }|rs|rn|d	 	 rnt
|d
krn|d	  |d
d  }|| qLd|}tdd| }|S )zz
        Intelligently clean text for audio conversion.
        Handles various markdown and formatting patterns.
        r   )z
^#{1,6}\s+z\*\*|\*|__|_z	```.*?```z`[^`]+`z!?\[.*?\]\(.*?\)z&[a-z]+;z^\s*[-*+]\s+z^\s*\d+\.\s+z^>\s+z\|-+\|z	^\|.*?\|$flagsz\1. )z^#{1,3}\s+(.*?)\s*$z^\s*[-*+]\s+(.*?)\s*$z^\s*\d+\.\s+(.*?)\s*$zA^(Introduction|Conclusion|Summary|Note|Tip|Warning|Important):\s*z"^(Step \d+|Phase \d+|Part \d+):\s*z^(Q\d+|Question \d+)[:.]?\s*z'^(Easy|Moderate|Difficult|Hard)[-:]?\s*z	[.!?]{2,}r3   r   r   r   Nr   z\s+)rV   r   r   	MULTILINEr   itemsr   r   r   islowerr{   upperr   r   )r1   textpatterns_to_remover   replacementsreplacementr   r   r   r   r   intelligent_text_cleaner  s8   

z/ContentCreationService.intelligent_text_cleanerc           	      C   s  |sdS | d}g }|D ]}| }|sq|dr*|d|dd    q|dr>|d|d	d    q|d
rR|d|dd    q|drd||dd    q|dss|dss|dr|dd  }tdd|}tdd|}|d|  qtd|rtdd|}tdd|}|d|  q|}tdd|}tdd|}tdd|}tdd|}| r|| qg }t|D ]\}}|| |ds|dr|d qd| S )z
        Optimal markdown to speech converter - simpler but effective.
        Preserves semantic meaning without over-engineering.
        r   r   # zMain topic: r   N## 	Section:    ### Subsection: r   ####    z- z* z+ z[\*_]{1,2}(.*?)[\*_]{1,2}\1z	`([^`]+)`zcode \1   • 	^\d+\.\s+\*\*(.*?)\*\*	\*(.*?)\*\[([^\]]+)\]\([^)]+\))zMain topic:Section:zSubsection:)	r   r   r   r   r   r   matchr|   r   )	r1   r   r   speech_linesr   item
clean_linerG   r   r   r   r   optimal_markdown_to_speechJ  sT   








z1ContentCreationService.optimal_markdown_to_speechc           	      C   s  |sdS t |}|d}g }|D ]d}| }|sq|dr1|dd  }|d|  q|drG|dd  }|d	|  q|d
r]|dd  }|d|  q|drs|dd  }|d|  q|dr|dd  }|d|  q|dr|dd  }|d|  qtd|rtdd|}| |}|d|  qtd|rtd|}|r|	d}|	d}| |}|d| d|  qtd|rtdd|}| |}d|
 v sd|v r|d|  q|d |  q|d!r0|dd  }| |}|d"|  qtd#|r=|d$ qd%|v ritd&|ritd'|shd(d) |d%D }|rh|d*d+|  q| |}|rv|| q| d|S ),z
        Convert markdown to natural speech while preserving semantic meaning.
        Handles: # Headings, **bold**, *italic*, - bullets, 1. numbered lists, `code`, [links](url)
        r   r   z######    Nr   z##### r   zMinor section: r   r   r   r   r   zTopic: r   r   z	Chapter: r   r   zTitle: z	^[-*+]\s+r   r   z^(\d+)\.\s+(.*)Point rx   z^\[[ xX]\]\s+z[x]z[X]zCompleted: zTo do: z> zQuote: z^[-*_]{3,}$z---|z
\w.*\|.*\wz^\|?[-:| ]+\|?$c                 S   r   r   r   )r   cellr   r   r   r     r   z=ContentCreationService.markdown_to_speech.<locals>.<listcomp>zTable row: z; )rV   r   r   r   r   r   r   r   _remove_inline_formattinggrouplowersearchr   _add_speech_pauses)	r1   r   r   result_linesr   contentr   numbercellsr   r   r   markdown_to_speech  s|   















z)ContentCreationService.markdown_to_speechc                 C   s   |sdS |}dd }dd }dd }dd	 }t d
||}t d||}t d||}t d||}t d||}t d||}t dd|}t dd|}|dd}|dd}|dd}|dd}| S )zV
        Remove inline markdown formatting while preserving semantic meaning.
        r   c                 S      |  d}| S Nr   r   r   r   r   r   r   replace_bold     
zFContentCreationService._remove_inline_formatting.<locals>.replace_boldc                 S   r   r   r   r   r   r   r   replace_italic  r   zHContentCreationService._remove_inline_formatting.<locals>.replace_italicc                 S      |  d}d| S )Nr   zcode: r   r   r   r   r   replace_inline_code     

zMContentCreationService._remove_inline_formatting.<locals>.replace_inline_codec                 S   r   )Nr   zstrikethrough: r   r   r   r   r   replace_strikethrough  r   zOContentCreationService._remove_inline_formatting.<locals>.replace_strikethroughr   r   z	__(.*?)__z_(.*?)_z`(.*?)`z	~~(.*?)~~r   r   z!\[([^\]]*)\]\([^)]+\)z	Image: \1z&nbsp;r   z&amp;andz&lt;z	less thanz&gt;zgreater than)r   r   r9   r   )r1   r   originalr   r   r   r   r   r   r   r     s(   z0ContentCreationService._remove_inline_formattingc                 C   s   | d}g }t|D ]_\}}| }|sq|dr&|| |d q|drG|| |t|d k rF||d  dsF|d q|| |t|d k rj||d   rj||d  dsj|d qd|S )zK
        Add natural pauses for speech based on content structure.
        r   )Title:Chapter:zTopic:r   r   )r   r   z
Completed:zTo do:r   )r   r   )r   r   r   r   )r   r|   r   r   r   r{   r   )r1   r   r   rG   r   r   r   r   r   r   &  s&   




"

2

z)ContentCreationService._add_speech_pausesc                 C   st  t d|  z]t| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	d
d}|| t d t|	dd}t
j|rbt
	|| t d|  |W S W dS  ty } zIt d|  ddl}|  t|d }	t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W Y d}~dS d}~ww )z3Convert text to WAV audio file - SIMPLIFIED VERSIONConverting text to audio: ,Warning: Empty text, skipping audio creationNr   temp_audio_   ra   Creating MP3 with gTTS: enFr   langslow   ✓ gTTS conversion completerA      ✓ Audio saved as    ❌ Error in text_to_wav: 
.error.txtwre   rf   Error: 

Text:   Saved error details to: )r#   rV   r   r   uuiduuid4hexr   saver9   r   r   r   	Exception	traceback	print_excrk   write)r1   r   r-   r   temp_mp3tts
mp3_outputer   
error_filerp   r   r   r   text_to_wav_oldF  s@   

 z&ContentCreationService.text_to_wav_oldc                    s"   t |d}||I dH  dS )z%Convert text to speech using Edge TTSen-US-JennyNeuralN)edge_ttsCommunicater   )r1   r   output_pathcommunicater   r   r   _text_to_speech_edge|  s   z+ContentCreationService._text_to_speech_edgec                    sX  t d|  zt| }| |}|st d W dS ddl}d| jdd  d}t d|  ddl}||d	}|	|I dH  t d
 t
|}|jjddd t d|j  tj|rtj|}|dkrt d z&t d ddlm} ||ddd}	|		| tj|}|dkrt d W n ty }
 zt d|
  W Y d}
~
nd}
~
ww |dkrddl}||t| t d| d| d t|W S t d|  t| W dS t d|  W dS  ty } zt d|  ddl}|  z<t d ddlm} ||ddd}	|		t| tj|r>tj|dkr>t d|  t|W W  Y d}~S W n tyY }
 zt d|
  W Y d}
~
nd}
~
ww t|d }z0t|d d!d"}|d#| d$|dd%   W d   n	1 sw   Y  t d&|  W n   t d' Y W Y d}~dS W Y d}~dS d}~ww )(.Convert text to WAV audio file - FIXED VERSIONr   r   Nr   r   r   ra   zCreating MP3 with Edge TTS: r   u    ✓ Edge TTS conversion completeTparentsrc       ✓ Directory created/verified: u2   ❌ Warning: Created audio file is empty (0 bytes)zTrying fallback with gTTS...r   r   Fr   u   ✓ gTTS fallback successfulu   ❌ gTTS fallback also failed: r   z (z bytes)u'   ❌ Audio file is empty, not saving to    ❌ Temp file not found: r   z(Attempting gTTS as emergency fallback...u(   ✓ Emergency gTTS fallback successful: u$   ❌ Emergency fallback also failed: r   r   re   rf   r   r   r   r   Could not save error file)r#   rV   r   r   r   r   r   r   r   r   r   parentmkdirr   r   r   getsizegttsr   r   shutilmoveremover   r   rk   r   )r1   r   r-   r   r   r   r   	file_sizer   tts_gttse2r  r   r   r   rp   r   r   r   rm     s   


  
z"ContentCreationService.text_to_wavc                    s  t d|  zzt| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	}|	|I dH  t d
 t
|}|jjddd t d|j  tj|ryddl}||t| t d|  t|W S t d|  W dS  ty } z[t d|  ddl}|  t|d }	z/t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W n   t d Y W Y d}~dS W Y d}~dS d}~ww )r   r   r   Nr   r   r   ra   r   r   r   Tr   r   r   r   r   r   r   re   rf   r   r   r   r   r  )r#   rV   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r  r  r   r   r   rk   r   r1   r   r-   r   r   r   r  r   r   r   rp   r   r   r   text_to_wav_30  sR   

 
z%ContentCreationService.text_to_wav_30c                 C   s  t d|  zxt| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	d
d}|| t d t	|}|j
jddd t d|j
  tj|rvddl}||t| t d|  t|W S t d|  W dS  ty } z[t d|  ddl}|  t|d }	z/t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W n   t d Y W Y d}~dS W Y d}~dS d}~ww )r   r   r   Nr   r   r   ra   r   r   Fr   r   Tr   r   r   r   r   r   r   re   rf   r   r   r   r   r  )r#   rV   r   r   r   r   r   r   r   r   r  r  r   r   r   r  r  r   r   r   rk   r   r  r   r   r   text_to_wav_30012026  sP   


 
z+ContentCreationService.text_to_wav_30012026c                 C   s6   t d td}|t|}t d|d  |d S )Nzinside wave to text conversionbaseu   🎤 Transcription:r   )r#   whisper
load_model
transcriberV   )r1   r^   modelrG   r   r   r   rT   R  s
   
z*ContentCreationService.wav_to_text_whisperc                 C   s  zt j| jsdddW S td t| j}| }td|d  d |d }dddddd	}t|d
 ddD ]\}}|d }t	dd|
 }td| d|  t|d ddD ]f\}	}
|
d }td|}|rr|d}n| d|	 }t	dd|
 }|d  d7  < td| d|  | ||}|d  |d 7  < |d  |d 7  < |d  |d 7  < |d  |d  7  < q\q9d!d"| j|d#|d  d$|d  d%|d  d&d'W S  ty	 } zdd(l}| }td)|  td*|  td+d,| d-d(}~ww ).z6Creates audio files for existing content and questionsfailureUser journey is not created.statusmessagez"Starting audio creation process...Course Title: course_titler   r   )total_lessonscontent_audio_createdquestion_audio_createdskipped_contentskipped_questionsmodulesr   rv   module_title^Module \d+:\s*r   Processing Module rx   lessonslesson_titleLesson (\d+\.\d+)r3   ^Lesson \d+\.\d+:\s*r    Processing Lesson r  content_createdr  questions_createdr  content_skippedr  questions_skippedsuccesszAudio creation completed.zCreated z content audio files and z question audio files for z	 lessons.)r  r  hashidstatssummaryNzERROR in run_audio_creation: zTraceback: r   z$Unexpected error in audio creation: status_codedetail)r   r   r   r-   r#   r   parse_contentr|   r   r   r   r   r   _create_lesson_audio_filesr+   r   r   
format_excr   )r1   parserparsed_resultr  audio_statsr:   moduler!  module_name
lesson_idxlessonlesson_title_fulllesson_number_matchr;   lesson_namelesson_audio_statsr   r   error_detailsr   r   r   run_audio_creation[  s^   
	"z)ContentCreationService.run_audio_creationc           
   
   C   sv  ddddd}| dd}ztd| j d| d| d}td	| j d| d| d
}| r| std z4t|ddd}| }W d   n1 sOw   Y  t| 	|| |d  d7  < td W W |S  t
y }	 ztd|	  W Y d}	~	W |S d}	~	ww td |d  d7  < W |S td|  W |S  t
y }	 ztd|	  W Y d}	~	|S d}	~	ww )9Create audio files for a specific lesson and return statsr   r)  r*  r+  r,  r3   r4   r@   r6   r7   ra   r5   r'       Creating content audio...rd   re   rf   Nr)  r          ✓ Content audio created&       ✗ Error creating content audio:      Content audio already existsr+  %    Warning: Content file not found: u)       ✗ Error in audio creation process: )r9   r   r+   r   r#   rk   rl   asynciorunrm   r   )
r1   r:   r;   r/  r<   content_audio_pathro   rp   rq   r   r   r   r   r5    sF   

z1ContentCreationService._create_lesson_audio_filesc              
   C   s  t d| j d| d| d}t d| j d| d| d}| r| r-t|dng }| s:|jddd	 |rRtd
t| d |d  t|7  < dS td zUt|ddd}|	 }	W d   n1 smw   Y  | 
|	}
d}t|
ddD ]\}}|| d }| s| || |d7 }q|d  |7  < td| d W dS  ty } ztd|  W Y d}~dS d}~ww td|  dS )z'Create audio files for lesson questionsrB   r6   r7   rK   r8   r'   rh   Tr       # question audio files already existr,  $    Creating question audio files...rd   re   rf   Nr   r   rv   ra   r*         ✓ Created  question audio files'       ✗ Error creating question audio: &    Warning: Question file not found: )r   r+   r   rR   rS   r  r#   r{   rk   rl   rz   r|   rm   r   )r1   r:   r;   r<   r/  question_audio_dirr>   existing_audiorp   r}   r~   created_countr   rM   audio_question_pathr   r   r   r   _create_question_audio_files  s:   

z3ContentCreationService._create_question_audio_filesc              
   C   s  ddddd}| dd}td| j d| d| d}td	| j d| d| d
}| r| std z/t|ddd}| }W d   n1 sNw   Y  | || |d  d7  < td W n- ty }	 ztd|	  W Y d}	~	nd}	~	ww td |d  d7  < ntd|  td| j d| d| d}
td| j d| d| d
}| r_|
 rt	|

dng }|
 s|
jddd |rtdt| d |d  t|7  < |S td  zXt|ddd}| }W d   n	1 sw   Y  | |}d}t|dd!D ]\}}|
| d }| s/| || |d7 }q|d"  |7  < td#| d$ W |S  ty^ }	 ztd%|	  W Y d}	~	|S d}	~	ww td&|  |S )'rD  r   rE  r3   r4   r@   r6   r7   ra   r5   r'   rF  rd   re   rf   Nr)  r   rG  rH  rI  r+  rJ  rB   rK   r8   rh   Tr   rN  rO  r,  rP  rv   r*  rQ  rR  rS  rT  )r9   r   r+   r   r#   rk   rl   rm   r   rR   rS   r  r{   rz   r|   )r1   r:   r;   r/  r<   rM  ro   rp   rq   r   rU  r>   rV  r}   r~   rW  r   rM   rX  r   r   r   ,_create_lesson_audio_files_before_30_01_2026  st   




zCContentCreationService._create_lesson_audio_files_before_30_01_2026c                 C   s  zTt j| jsdddW S t j| jrStd t| j}| }t jt j| j	dd |
| j	| td|d  d	 |d }d
d
d
d
d
d}t|d ddD ]\}}|d }|d }|d }	|d }
td| d|  tdd| }t|d ddD ]\}}|d }|d }|d }|d }td|}|r|d}n| d| }tdd| }|d  d7  < td| d|  td|}|rt| }|d  }|d! }nd"}d#}d$| j d%| d&|dd' d(}d)| j d%| d&|dd' d(}| ||r3td* | j||||	|
||||||||d+}|d,  d7  < |d-  d7  < qtd. |d/  d7  < |d0  d7  < qqUd1d2| j|d3d4W S W d8S  tyk } z	td5d6| d7d8}~ww )9>Creates content for each module and lesson (NO audio creation)r  r  r  The user journey is createdTrb   r  r  r   r   )r  r)  content_updatedr*  questions_updatedr   r   rv   r!  focusoutcomedurationr#  rx   r"  r   r$  r%  topicsr&  r3   r'  r  r(  (\d+)x      `	  (
  content/r6   r7   r4   r'   r8   )    Creating new content and questions...r:   r;  module_focusmodule_outcomemodule_durationr;   r@  lesson_durationlesson_outcomelesson_topicsr  word_count_lowerword_count_upperr)  r*  '    Content and questions already existr]  r^  r-  AContent and questions created (audio will be created separately).-Call /run-audiocreation to create audio filesr  r  r.  r/  	next_stepr   Error parsing result: r1  N)r   r   r   r-   r#   r   r4  rj   r   r0   export_to_jsonr|   r   r   r   r   r   intr+   r9   _should_create_content_create_new_lessonr   r   )r1   r7  r8  r  content_statsr:   r:  r!  rk  rl  rm  r;  r<  r=  r>  rn  ro  rp  r?  r;   r@  duration_matchminuteslower_rangeupper_rangero   r>   rG   r   r   r   r   run_content_creation_23012026?  s   
	
"";`z4ContentCreationService.run_content_creation_23012026c              	   C   t   | dd}tjd| j d| d| d}tjd| j d| d| d}|  r2|r0|s2dS |  s8dS d	S 
z$Check if content needs to be createdr3   r4   rh  r6   r7   r'   r8   TFr9   r   r   r   r+   _is_first_iterationr1   r:   r;   r<   content_existsquestion_existsr   r   r   r{       ""z-ContentCreationService._should_create_contentc                 C   sp  zt j| jsdddW S t j| jrtd t| j}| }t jt j| j	dd |
| j	| td|d  d	 |d }d
d
d
d
d
d
d
d}t|d ddD ]<\}}|d }|d }|d }	|d }
td| d|  tdd| }t|d ddD ]\}}|d }|d }|d }|d }td|}|r|d}n| d| }tdd| }|d  d7  < td| d|  td|}|rt| }|d  }|d! }nd"}d#}|dd$}d%| j d&| d'| d(}d)| j d&| d'| d(}| ||rtd* | j||||	|
||||||||d+}|d,  d7  < |d-  d7  < t j|rY| |}|rY|d.  d7  < |d/rY|d/  d7  < t j|r~| |}|r~|d.  d7  < |d/r~|d/  d7  < qtd0 |d1  d7  < |d2  d7  < qqWd3d4| j|d5d6W S W d:S  ty } z	td7d8| d9d:}~ww );r[  r  r  r  r\  Trb   r  r  r   r   )r  r)  r]  r*  r^  word_count_removedspecial_chars_cleanedr   r   rv   r!  r_  r`  ra  r#  rx   r"  r   r$  r%  rb  r&  r3   r'  r  r(  rc  rd  re  rf  rg  r4   rh  r6   r7   r'   r8   ri  rj  r)  r*  r  r  rs  r]  r^  r-  rt  ru  rv  r   rx  r1  N)r   r   r   r-   r#   r   r4  rj   r   r0   ry  r|   r   r   r   r   r   rz  r9   r+   r{  r|  _clean_content_filegetr   r   )r1   r7  r8  r  r}  r:   r:  r!  rk  rl  rm  r;  r<  r=  r>  rn  ro  rp  r?  r;   r@  r~  r  r  r  r<   ro   r>   rG   cleanedr   r   r   r   run_content_creation  s   



Krz+ContentCreationService.run_content_creationc              	   C   r  r  r  r  r   r   r   r{  3  r  c                 C   s&   t d| jj  | jjdkrdS dS )NzInside feedback test : zFirst iterationTF)r#   r)   feedback)r1   r   r   r   r  B  s   z*ContentCreationService._is_first_iterationc              
   C   s(  zt |ddd}| }W d   n1 sw   Y  |}d}d}g d}|D ]}tj|d|tjtjB d}	|	|kr?d	}|	}q(d
dl}
t|
j}|	d g }|D ]%}|dks`|dks`|dkrf|
| qR||v rp|
| qR|
d d	}qRd|}tdd|}tdd|}tdd|}tdd|}tdd|}dd |dD }d|}|dd}| |}||krt |ddd}|| W d   n1 sw   Y  tdtj|  d	|dW S ddiW S  ty } ztd| d |  dt|d!W  Y d}~S d}~ww )"zAClean content file by removing word counts and special charactersrd   re   rf   NF)>\n*(?:Word\s*Count|word\s*count|WORD\s*COUNT)\s*:?\s*\d+\s*\n*/\n*CONTENT_(?:REJECTED|TOO_SHORT).*?\n*(?=\n|$)D\n*###?\s*(?:Validation|Word Count|Feedback).*?(?=\n###|\n##|\n#|\Z)z+\n*(?:Reading time|Estimated reading).*?\n*z'\n*(?:Retry|Attempt|Regeneration).*?\n*\n\s*Total\s*$\n\s*Total\s*\n*$r   r   Tr      —–•°´`‘’"“”	r   r   [ \t]+\n\s*\n\s*\n+

z\*\*\s+z**z\s+\*\*z#\s+r   c                 S   s   g | ]}|  qS r   )rstrip)r   r   r   r   r   r     s    z>ContentCreationService._clean_content_file.<locals>.<listcomp>   ﻿r   u       ✓ Cleaned: )r  r  r  u       ✗ Error cleaning rx   )r  error)rk   rl   r   r   r   r   stringset	printableupdater   r   r   r9   _remove_trailing_metadatar   r#   r   r   basenamer   rV   )r1   	file_pathrp   r   original_contentr  r  word_count_patternsr   new_contentr  allowed_charscleaned_contentcharr   r   r   r   r   r  H  sb   








z*ContentCreationService._clean_content_filec                 C   s^  | j  }td|}|rt| }nd}|dd}d| j d| d| d}d	| j d| d| d}||||||d
||||	|
dd}td|  td| d|  td| j j	 d| j j
  ||t||||||||	|
|| d| dd td|  | jj|d}|j|d | | t|ddd}| }W d   n1 sw   Y  ddd |
D }td|  t||||| d| }| |}| jj|d}||d< d |d!< |j|d | | d"| j d| d| d}tjtj|d#d$ t|d%dd}|| W d   n	1 s#w   Y  | | d&S )'z4Create content for a new lesson WITH ENHANCED PROMPTrc     r3   r4   rh  r6   r7   r'   r8   )r   namer_  r`  ra  )r   titlera  
objectivesrb  )CourseModuleLessonz*The user journey for content creation is: z(The word count for content creation is: z to z.The level and motive for content creation is: z and a  
            ## CRITICAL FORMATTING RULES - MUST FOLLOW:
            
            1. **NO WORD COUNT REFERENCES:**
            - DO NOT include "Word Count:", "word count:", "Total words:" or any word count information
            - DO NOT mention word count validation in the content
            - DO NOT add word count numbers anywhere
            - DO NOT add standalone word "Total" at the end of content
            
            2. **NO VALIDATION COMMENTS:**
            - DO NOT include "CONTENT_REJECTED", "CONTENT_TOO_SHORT", or validation feedback
            - DO NOT mention validation or approval processes
            - Only output the actual lesson content
            
            3. **CLEAN SPECIAL CHARACTERS:**
            - Use standard ASCII characters only
            - Avoid Unicode special characters that may cause display issues
            - Use standard punctuation: , . ! ? : ; " '
            
            4. **PROPER MARKDOWN FORMATTING:**
            - Use consistent heading levels: # for main title, ## for sections, ### for subsections
            - Use proper line breaks and spacing
            - Ensure all markdown syntax is correctly formatted
            
            5. **OUTPUT FORMAT:**
            - Only output the lesson content in clean markdown
            - No extra comments, notes, or explanations
            - No word count information
            - No validation messages
            )r  module_numberr;  rk  rl  r;   r%  rn  lesson_objectivesrp  user_journey
word_countstrict_formattingzCreating content file: )r-   rO   rd   re   rf   Nz, and c                 S   s   g | ]}d | d qS )'r   )r   topicr   r   r   r     s    z=ContentCreationService._create_new_lesson.<locals>.<listcomp>zThe topic sections are: r   a   
        ## IMPORTANT FORMATTING RULES:
        - DO NOT include word count or validation comments
        - Use clean, standard characters only
        - Format questions clearly with ### Q1:, ### Q2:, etc.
        - No extra text or explanations
        question_instructionsr5   Trb   r   z7Content and questions created and cleaned successfully.)r)   dictr   r   rz  r   r9   r+   r#   Levelmotiver  rV   r(   second_stage_crewrX   r  rk   rl   r   content_reviewerreview_and_enrich_content_clean_text_contentqa_gen_crewr   rj   r   r   r   )r1   r:   r;  rk  rl  rm  r;   r@  rn  ro  rp  r  rq  rr  rP   duration_minutes_matchr  r<   ro   r>   complete_datar_   rp   r   topic_sectionsupdated_dataupdated_content_pathout_filer   r   r   r|    s   
/




z)ContentCreationService._create_new_lessonc                 C   sd  |s|S |  |}g d}|D ]}tj|d|tjtjB d}qtjdd|tjd}tjdd|tjd}ddl}t|j}|d	 g }|D ]#}|dksS|d
ksS|dkrY|	| qE||v rc|	| qE|	d qEd
|}tdd|}tdd|}|dd}|d}g }	|D ]}
|
 }|r| dkr|		|
 q|r| dkrqqd
|	}| S )zTClean text content by removing word counts, special characters, and trailing 'Total')r  r  r  r   r   r  r   r  r   Nr  r  r  r   r  r  r  r  total)r  r   r   r   r   r  r  r  r  r   r   r9   r   r   r   )r1   r   r   r   r  allowedcleaned_charsr  r   cleaned_linesr   stripped_liner   r   r   r  (  sB   





z*ContentCreationService._clean_text_contentc                 C   s   |s|S |}g d}|D ]	}t |d|}q|d}g }h d}|D ]1}|  }	|	|v r0q#d}
|D ]}|	|d sH|	|d sH|	|krLd}
 nq4|
sT|| q#d|}t d	d|}| S )
zFRemove trailing metadata words and phrases from text - SIMPLER VERSION)z?(?:\n\s*)?[Tt][Oo][Tt][Aa][Ll]\s*(?:[Ww][Oo][Rr][Dd][Ss]?\s*)?$z:\n\s*[Tt][Oo][Tt][Aa][Ll]\s*\n\s*[Ww][Oo][Rr][Dd][Ss]?\s*$z\n\s*[Tt][Oo][Tt][Aa][Ll]\s*$z\n\s*[Ww][Oo][Rr][Dd][Ss]?\s*$z\n\s*\d+\s*(?:words?|total)\s*$r   r   >   endwordcountr  wordsr0  
conclusionF:r   Tz2\n.*[Tt][Oo][Tt][Aa][Ll].*[Ww][Oo][Rr][Dd][Ss]?.*$)r   r   r   r   r   r   r   r   )r1   r   original_textpatternsr   r   r  metadata_wordsr   stripped	skip_liner  r   r   r   r  k  s8   


z0ContentCreationService._remove_trailing_metadataN)#__name__
__module____qualname__r2   r?   rH   r`   rs   rt   ru   rn   rz   r   r   r   r   r   r   r   rm   r  r  rT   rC  r5  rY  rZ  r  r{  r  r  r  r|  r  r  r   r   r   r   r&   H   sF    
$ICe4 6]:9	G,(Ii{T Cr&   )$sysfastapir   r   utils.hashingr   user_journey_service.crewr   2user_journey_service.processors.duration_estimatorr   5user_journey_service.processors.StagewiseCourseParserr   0user_journey_service.processors.content_reviewerr   8user_journey_service.processors.user_journey_synthesizerr	   *user_journey_service.tools.custom_stt_toolr
   duration_estimatorr  synthesizerstt_toolpathlibr   r   r  r  r   pydubr   
subprocessrK  r   r%   pydub_configuredr&   r   r   r   r   <module>   s4    )