o
    4jsi                     @   s   d dl Z d dlmZ d dlZddlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d d	lmZ d d
lmZ e	 Ze Ze Ze Zd dlmZ d dlZd dlZd dlmZ d dlmZ d dl Z d dlZdd Ze Z G dd dZ!dS )    N)HTTPException   )HashGenerator)UserJourney)MicrolearningDurationEstimator)CourseOutlineParser)ContentReviewer)Synthesizer)LiveWhisperSTTTool)Path)gTTS)AudioSegmentc                     s~   t jt jt  fdd} |  }|r9|t_t jt j|d}t j|r-|t_	n|t_	t
d|  dS t
d dS )z!Configure pydub with ffmpeg pathsc                     sv   t j d} t j| r| S tddD ]$}t jj gdg| R  }t jt j|d} t j| r8|   S qd S )Nz
ffmpeg.exe      z..)ospathjoinexistsrangeabspath)ffmpeg_pathi
parent_dircurrent_dir /home/azureuser/microlearn/backend/user_journey_with_openai/agentic_workflow/src/user_journey_service/services/content_creation_service.pyfind_ffmpeg   s   z$configure_pydub.<locals>.find_ffmpegzffprobe.exeu   ✓ Pydub configured with: Tu=   ⚠ Warning: ffmpeg.exe not found. Audio conversion may fail.F)r   r   dirnamer   __file__r   	converterr   r   ffprobeprint)r   r   ffprobe_pathr   r   r   configure_pydub   s   r$   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d* Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8S )9ContentCreationServicec                 C   sn   t  | _|| _t|| _d| j d| _d| j d| _d| j d| _d| j d| _	d| j d| _
d S )Nz	research/.mdzoutput/zoutput1/zoutput2/zparsed_course_content/z.json)r   crew_instance
input_datar   generate_input_hash
input_hashresearch_fileoutput_fileoutput_file_1output_file_2json_output_path)selfr(   r   r   r   __init__F   s   zContentCreationService.__init__c                 C   sX   | dd}td| j d| d| d}td| j d| d| d}| o+| S )z9Check if content and questions already exist for a lesson._updated_content//module_lessonr&   	question/)replacer   r*   r   )r0   
module_idxlesson_numberlesson_safeupdated_content_filequestion_filer   r   r   _has_updated_content_questionsQ   s   z5ContentCreationService._has_updated_content_questionsc           	      C   s   t d |dd}td| j d| d| d}td| j d| d| d	}td| j d| d| d
}td| j d| d| d}| oU| oU| oU| }t d|  |S )z'Check if audio files exist for a lessonz.Inside audio files availability check functionr2   r3   audio/lessons/r5   r6   .wavaudio/questions/z/1.wavz/2.wavz/3.wavzThe output is : )r"   r8   r   r*   r   )	r0   r9   r:   r;   audio_contentaudio_question1audio_question2audio_question3resultr   r   r    _has_audio_content_and_questions[   s   z7ContentCreationService._has_audio_content_and_questionsc                 C   s   i }t d| j d| d}td|  t d| j d| d}t|d}|D ]-}t| | |}td td	 ttd
}||d}	|||< | j	 }
|
j
|	d q*d|i}	| j }
|
j
|	d d S )Naudio/stage//stager@   z$The audio content is available at : rA   /*.wavu/   🎤 Press Enter when you're ready to answer...u    📢 Listening to your answer...zEnter your answer: )questionanswerinputsusers_response)r   r*   r"   sortedglobwav_to_text_whisperinputstrr'   evaluator_crewkickoffassessment_crew)r0   idxall_qns_answerrB   audio_question	wav_fileswav_filerL   rM   rO   crewr   r   r   _run_evaluationp   s$   



z&ContentCreationService._run_evaluationc           
      C   s  | dd}td| j d| d| d}| r!td|  n@tjtjd| j dd	d
 td| j d| d| d}t	|ddd}|
 }W d   n1 sVw   Y  | || td| j d| d| d}t|d}	|	stj|d	d
 td | || dS dS )zCreate audio files for a lessonr2   r3   r?   r5   r6   .mp3z/Audio for the content is already available at: rJ   Texist_okr4   r&   rutf-8encodingNrA   *.mp3Question audio is not available)r8   r   r*   r   r"   r   makedirsr   r   openreadtext_to_wavrQ   rR   _create_audio_question)
r0   r9   r:   r;   rB   content_fileftext_contentaudio_question_dirr\   r   r   r   _create_audio_files   s"    
z*ContentCreationService._create_audio_filesc           	      C   s   | dd}td| j d| d| d}tjtjd| j ddd	 td
| j d| d| d}t|ddd}| }W d   n1 sJw   Y  | 	|| td| j d| d| d}tj|dd	 t
d | || dS )z>Create audio files for a lesson without checking if they existr2   r3   r?   r5   r6   r`   rJ   Tra   r4   r&   rc   rd   re   NrA   rh   )r8   r   r*   r   ri   r   r   rj   rk   rl   r"   rm   )	r0   r9   r:   r;   rB   rn   ro   rp   rq   r   r   r   !_create_audio_files_without_check   s    
z8ContentCreationService._create_audio_files_without_checkc                 C   s  t d| j d| d}| rtd|  n=tjtjd| j ddd t d| j d| d	}t|d
dd}|	 }W d    n1 sJw   Y  | 
|| t d| j d| d}t|d}|stjtjd| j d| ddd td | | d S d S )NrH   rI   r@   z/Audio for the content is already available at :rJ   Tra   r4   r&   rc   rd   re   rA   rK   zquestion audio is not available)r   r*   r   r"   r   ri   r   r   rj   rk   rl   rQ   rR   rm   )r0   rY   rB   rn   ro   rp   r[   r\   r   r   r   _create_audio_files_04_01_2026   s     
&z5ContentCreationService._create_audio_files_04_01_2026c                 C   s  | dd}td| j d| d| d}| rt|ddd	}| }W d
   n1 s/w   Y  | |}tdt| d| d|  t	|ddD ]1\}}	td| d| d| d|	d
d  d	 td| j d| d| d| d	}
| 
|	|
 qNd
S td|  d
S )z6Create audio files for questions for a specific lessonr2   r3   r7   r5   r6   r&   rc   rd   re   NzFound z* questions to convert to audio for Module z	, Lesson r   startu   
🧠 Asking Question z for Module : d   z...
rA   rJ   r@   zQuestion file not found: )r8   r   r*   r   rj   rk   parse_questionsr"   len	enumeraterl   )r0   r9   r:   r;   r=   ro   	questionsparsed_questionsindexrL   r[   r   r   r   rm      s   

*$z-ContentCreationService._create_audio_questionc                 C   s   | d}g }g }|D ]8}| }|ds|dr7|r)|d|  g }tdd|}tdd|}|rC|dsC|| q|rP|d|  dd	 |D }|smd
}t||tjtj	B }dd	 |D }|dd S )z"Parse questions from markdown text
z### Q z^### Q\d+:\s* z^### Q\d+:.*?\s*z###c                 S   s    g | ]}|rt |d kr|qS )
   )rz   .0qr   r   r   
<listcomp>   s     z:ContentCreationService.parse_questions.<locals>.<listcomp>z#### Q\d+:?\s*(.*?)(?=\n### Q\d+:|$)c                 S      g | ]
}|  r|  qS r   stripr   r   r   r   r          N	   )
splitr   
startswithappendr   resubfindallDOTALL
IGNORECASE)r0   md_textlinesr|   current_questionlinepatternr   r   r   ry      s,   

z&ContentCreationService.parse_questionsc                 C   s  |sdS t |}g d}|D ]}tj|d|tjtjB d}qdddddddd}| D ]\}}tj|||tjtjB d}q,tdd|}g }|dD ]'}| }|rs|rn|d	 	 rnt
|d
krn|d	  |d
d  }|| qLd|}tdd| }|S )zz
        Intelligently clean text for audio conversion.
        Handles various markdown and formatting patterns.
        r   )z
^#{1,6}\s+z\*\*|\*|__|_z	```.*?```z`[^`]+`z!?\[.*?\]\(.*?\)z&[a-z]+;z^\s*[-*+]\s+z^\s*\d+\.\s+z^>\s+z\|-+\|z	^\|.*?\|$flagsz\1. )z^#{1,3}\s+(.*?)\s*$z^\s*[-*+]\s+(.*?)\s*$z^\s*\d+\.\s+(.*?)\s*$zA^(Introduction|Conclusion|Summary|Note|Tip|Warning|Important):\s*z"^(Step \d+|Phase \d+|Part \d+):\s*z^(Q\d+|Question \d+)[:.]?\s*z'^(Easy|Moderate|Difficult|Hard)[-:]?\s*z	[.!?]{2,}r2   r   r   r   Nr   z\s+)rU   r   r   	MULTILINEr   itemsr   r   r   islowerrz   upperr   r   )r0   textpatterns_to_remover   replacementsreplacementr   r   r   r   r   intelligent_text_cleaner   s8   

z/ContentCreationService.intelligent_text_cleanerc           	      C   s  |sdS | d}g }|D ]}| }|sq|dr*|d|dd    q|dr>|d|d	d    q|d
rR|d|dd    q|drd||dd    q|dss|dss|dr|dd  }tdd|}tdd|}|d|  qtd|rtdd|}tdd|}|d|  q|}tdd|}tdd|}tdd|}tdd|}| r|| qg }t|D ]\}}|| |ds|dr|d qd| S )z
        Optimal markdown to speech converter - simpler but effective.
        Preserves semantic meaning without over-engineering.
        r   r   # zMain topic: r   N## 	Section:    ### Subsection: r   ####    z- z* z+ z[\*_]{1,2}(.*?)[\*_]{1,2}\1z	`([^`]+)`zcode \1   • 	^\d+\.\s+\*\*(.*?)\*\*	\*(.*?)\*\[([^\]]+)\]\([^)]+\))zMain topic:Section:zSubsection:)	r   r   r   r   r   r   matchr{   r   )	r0   r   r   speech_linesr   item
clean_linerF   r   r   r   r   optimal_markdown_to_speechG  sT   








z1ContentCreationService.optimal_markdown_to_speechc           	      C   s  |sdS t |}|d}g }|D ]d}| }|sq|dr1|dd  }|d|  q|drG|dd  }|d	|  q|d
r]|dd  }|d|  q|drs|dd  }|d|  q|dr|dd  }|d|  q|dr|dd  }|d|  qtd|rtdd|}| |}|d|  qtd|rtd|}|r|	d}|	d}| |}|d| d|  qtd|rtdd|}| |}d|
 v sd|v r|d|  q|d |  q|d!r0|dd  }| |}|d"|  qtd#|r=|d$ qd%|v ritd&|ritd'|shd(d) |d%D }|rh|d*d+|  q| |}|rv|| q| d|S ),z
        Convert markdown to natural speech while preserving semantic meaning.
        Handles: # Headings, **bold**, *italic*, - bullets, 1. numbered lists, `code`, [links](url)
        r   r   z######    Nr   z##### r   zMinor section: r   r   r   r   r   zTopic: r   r   z	Chapter: r   r   zTitle: z	^[-*+]\s+r   r   z^(\d+)\.\s+(.*)Point rw   z^\[[ xX]\]\s+z[x]z[X]zCompleted: zTo do: z> zQuote: z^[-*_]{3,}$z---|z
\w.*\|.*\wz^\|?[-:| ]+\|?$c                 S   r   r   r   )r   cellr   r   r   r     r   z=ContentCreationService.markdown_to_speech.<locals>.<listcomp>zTable row: z; )rU   r   r   r   r   r   r   r   _remove_inline_formattinggrouplowersearchr   _add_speech_pauses)	r0   r   r   result_linesr   contentr   numbercellsr   r   r   markdown_to_speech  s|   















z)ContentCreationService.markdown_to_speechc                 C   s   |sdS |}dd }dd }dd }dd	 }t d
||}t d||}t d||}t d||}t d||}t d||}t dd|}t dd|}|dd}|dd}|dd}|dd}| S )zV
        Remove inline markdown formatting while preserving semantic meaning.
        r   c                 S      |  d}| S Nr   r   r   r   r   r   r   replace_bold     
zFContentCreationService._remove_inline_formatting.<locals>.replace_boldc                 S   r   r   r   r   r   r   r   replace_italic  r   zHContentCreationService._remove_inline_formatting.<locals>.replace_italicc                 S      |  d}d| S )Nr   zcode: r   r   r   r   r   replace_inline_code     

zMContentCreationService._remove_inline_formatting.<locals>.replace_inline_codec                 S   r   )Nr   zstrikethrough: r   r   r   r   r   replace_strikethrough	  r   zOContentCreationService._remove_inline_formatting.<locals>.replace_strikethroughr   r   z	__(.*?)__z_(.*?)_z`(.*?)`z	~~(.*?)~~r   r   z!\[([^\]]*)\]\([^)]+\)z	Image: \1z&nbsp;r   z&amp;andz&lt;z	less thanz&gt;zgreater than)r   r   r8   r   )r0   r   originalr   r   r   r   r   r   r   r     s(   z0ContentCreationService._remove_inline_formattingc                 C   s   | d}g }t|D ]_\}}| }|sq|dr&|| |d q|drG|| |t|d k rF||d  dsF|d q|| |t|d k rj||d   rj||d  dsj|d qd|S )zK
        Add natural pauses for speech based on content structure.
        r   )Title:Chapter:zTopic:r   r   )r   r   z
Completed:zTo do:r   )r   r   )r   r   r   r   )r   r{   r   r   r   rz   r   )r0   r   r   rF   r   r   r   r   r   r   #  s&   




"

2

z)ContentCreationService._add_speech_pausesc                 C   st  t d|  z]t| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	d
d}|| t d t|	dd}t
j|rbt
	|| t d|  |W S W dS  ty } zIt d|  ddl}|  t|d }	t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W Y d}~dS d}~ww )z3Convert text to WAV audio file - SIMPLIFIED VERSIONConverting text to audio: ,Warning: Empty text, skipping audio creationNr   temp_audio_   r`   Creating MP3 with gTTS: enFr   langslow   ✓ gTTS conversion completer@      ✓ Audio saved as    ❌ Error in text_to_wav: 
.error.txtwrd   re   Error: 

Text:   Saved error details to: )r"   rU   r   r   uuiduuid4hexr   saver8   r   r   r   	Exception	traceback	print_excrj   write)r0   r   r,   r   temp_mp3tts
mp3_outputer   
error_filero   r   r   r   text_to_wav_oldC  s@   

 z&ContentCreationService.text_to_wav_oldc                 C   s  t d|  zxt| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	d
d}|| t d t	|}|j
jddd t d|j
  tj|rvddl}||t| t d|  t|W S t d|  W dS  ty } z[t d|  ddl}|  t|d }	z/t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W n   t d Y W Y d}~dS W Y d}~dS d}~ww )z.Convert text to WAV audio file - FIXED VERSIONr   r   Nr   r   r   r`   r   r   Fr   r   Tparentsrb   u    ✓ Directory created/verified: r   u   ❌ Temp file not found: r   r   r   rd   re   r   r   r   r   zCould not save error file)r"   rU   r   r   r   r   r   r   r   r   parentmkdirr   r   r   shutilmover   r   r   rj   r   )r0   r   r,   r   r   r   r   r   r   r   ro   r   r   r   rl   z  sP   


 
z"ContentCreationService.text_to_wavc                 C   s6   t d td}|t|}t d|d  |d S )Nzinside wave to text conversionbaseu   🎤 Transcription:r   )r"   whisper
load_model
transcriberU   )r0   r]   modelrF   r   r   r   rS     s
   
z*ContentCreationService.wav_to_text_whisperc                 C   s  zt j| jsdddW S td t| j}| }td|d  d |d }dddddd	}t|d
 ddD ]\}}|d }t	dd|
 }td| d|  t|d ddD ]f\}	}
|
d }td|}|rr|d}n| d|	 }t	dd|
 }|d  d7  < td| d|  | ||}|d  |d 7  < |d  |d 7  < |d  |d 7  < |d  |d  7  < q\q9d!d"| j|d#|d  d$|d  d%|d  d&d'W S  ty	 } zdd(l}| }td)|  td*|  td+d,| d-d(}~ww ).z6Creates audio files for existing content and questionsfailureUser journey is not created.statusmessagez"Starting audio creation process...Course Title: course_titler   r   )total_lessonscontent_audio_createdquestion_audio_createdskipped_contentskipped_questionsmodulesr   ru   module_title^Module \d+:\s*r   Processing Module rw   lessonslesson_titleLesson (\d+\.\d+)r2   ^Lesson \d+\.\d+:\s*r    Processing Lesson r  content_createdr	  questions_createdr
  content_skippedr  questions_skippedsuccesszAudio creation completed.zCreated z content audio files and z question audio files for z	 lessons.)r  r  hashidstatssummaryNzERROR in run_audio_creation: zTraceback: r   z$Unexpected error in audio creation: status_codedetail)r   r   r   r,   r"   r   parse_contentr{   r   r   r   r   r   _create_lesson_audio_filesr*   r   r   
format_excr   )r0   parserparsed_resultr  audio_statsr9   moduler  module_name
lesson_idxlessonlesson_title_fulllesson_number_matchr:   lesson_namelesson_audio_statsr   r   error_detailsr   r   r   run_audio_creation  s^   
	"z)ContentCreationService.run_audio_creationc              
   C   s  ddddd}| dd}td| j d| d| d}td	| j d| d| d
}| r| std z/t|ddd}| }W d   n1 sNw   Y  | || |d  d7  < td W n- ty }	 ztd|	  W Y d}	~	nd}	~	ww td |d  d7  < ntd|  td| j d| d| d}
td| j d| d| d
}| r_|
 rt	|

dng }|
 s|
jddd |rtdt| d |d  t|7  < |S td  zXt|ddd}| }W d   n	1 sw   Y  | |}d}t|dd!D ]\}}|
| d }| s/| || |d7 }q|d"  |7  < td#| d$ W |S  ty^ }	 ztd%|	  W Y d}	~	|S d}	~	ww td&|  |S )'z9Create audio files for a specific lesson and return statsr   )r  r  r  r  r2   r3   r?   r5   r6   r`   r4   r&   z    Creating content audio...rc   rd   re   Nr  r   u       ✓ Content audio createdu&       ✗ Error creating content audio: z     Content audio already existsr  z%    Warning: Content file not found: rA   rJ   r7   rg   Tr   z    z# question audio files already existr  z$    Creating question audio files...ru   r  u       ✓ Created z question audio filesu'       ✗ Error creating question audio: z&    Warning: Question file not found: )r8   r   r*   r   r"   rj   rk   rl   r   rQ   rR   r   rz   ry   r{   )r0   r9   r:   r  r;   content_audio_pathrn   ro   rp   r   question_audio_dirr=   existing_audior|   r}   created_countr~   rL   audio_question_pathr   r   r   r!    st   




z1ContentCreationService._create_lesson_audio_filesc                 C   s  zTt j| jsdddW S t j| jrStd t| j}| }t jt j| j	dd |
| j	| td|d  d	 |d }d
d
d
d
d
d}t|d ddD ]\}}|d }|d }|d }	|d }
td| d|  tdd| }t|d ddD ]\}}|d }|d }|d }|d }td|}|r|d}n| d| }tdd| }|d  d7  < td| d|  td|}|rt| }|d  }|d! }nd"}d#}d$| j d%| d&|dd' d(}d)| j d%| d&|dd' d(}| ||r3td* | j||||	|
||||||||d+}|d,  d7  < |d-  d7  < qtd. |d/  d7  < |d0  d7  < qqUd1d2| j|d3d4W S W d8S  tyk } z	td5d6| d7d8}~ww )9>Creates content for each module and lesson (NO audio creation)r   r  r  The user journey is createdTra   r  r  r   r   )r  r  content_updatedr  questions_updatedr  r   ru   r  focusoutcomedurationr  rw   r  r   r  r  topicsr  r2   r  r  r  (\d+)x      `	  (
  content/r5   r6   r3   r&   r7   )    Creating new content and questions...r9   r'  module_focusmodule_outcomemodule_durationr:   r,  lesson_durationlesson_outcomelesson_topicsr  word_count_lowerword_count_upperr  r  '    Content and questions already existr7  r8  r  AContent and questions created (audio will be created separately).-Call /run-audiocreation to create audio filesr  r  r  r  	next_stepr   Error parsing result: r  N)r   r   r   r,   r"   r   r   ri   r   r/   export_to_jsonr{   r   r   r   r   r   intr*   r8   _should_create_content_create_new_lessonr   r   )r0   r#  r$  r  content_statsr9   r&  r  rE  rF  rG  r'  r(  r)  r*  rH  rI  rJ  r+  r:   r,  duration_matchminuteslower_rangeupper_rangern   r=   rF   r   r   r   r   run_content_creation_23012026L  s   
	
"";`z4ContentCreationService.run_content_creation_23012026c              	   C   t   | dd}tjd| j d| d| d}tjd| j d| d| d}|  r2|r0|s2dS |  s8dS d	S 
z$Check if content needs to be createdr2   r3   rB  r5   r6   r&   r7   TFr8   r   r   r   r*   _is_first_iterationr0   r9   r:   r;   content_existsquestion_existsr   r   r   rU       ""z-ContentCreationService._should_create_contentc                 C   sp  zt j| jsdddW S t j| jrtd t| j}| }t jt j| j	dd |
| j	| td|d  d	 |d }d
d
d
d
d
d
d
d}t|d ddD ]<\}}|d }|d }|d }	|d }
td| d|  tdd| }t|d ddD ]\}}|d }|d }|d }|d }td|}|r|d}n| d| }tdd| }|d  d7  < td| d|  td|}|rt| }|d  }|d! }nd"}d#}|dd$}d%| j d&| d'| d(}d)| j d&| d'| d(}| ||rtd* | j||||	|
||||||||d+}|d,  d7  < |d-  d7  < t j|rY| |}|rY|d.  d7  < |d/rY|d/  d7  < t j|r~| |}|r~|d.  d7  < |d/r~|d/  d7  < qtd0 |d1  d7  < |d2  d7  < qqWd3d4| j|d5d6W S W d:S  ty } z	td7d8| d9d:}~ww );r5  r   r  r  r6  Tra   r  r  r   r   )r  r  r7  r  r8  word_count_removedspecial_chars_cleanedr  r   ru   r  r9  r:  r;  r  rw   r  r   r  r  r<  r  r2   r  r  r  r=  r>  r?  r@  rA  r3   rB  r5   r6   r&   r7   rC  rD  r  r  re  rf  rM  r7  r8  r  rN  rO  rP  r   rR  r  N)r   r   r   r,   r"   r   r   ri   r   r/   rS  r{   r   r   r   r   r   rT  r8   r*   rU  rV  _clean_content_filegetr   r   )r0   r#  r$  r  rW  r9   r&  r  rE  rF  rG  r'  r(  r)  r*  rH  rI  rJ  r+  r:   r,  rX  rY  rZ  r[  r;   rn   r=   rF   cleanedr   r   r   r   run_content_creation  s   



Krz+ContentCreationService.run_content_creationc              	   C   r]  r^  r_  ra  r   r   r   rU  @  rd  c                 C   s&   t d| jj  | jjdkrdS dS )NzInside feedback test : zFirst iterationTF)r"   r(   feedback)r0   r   r   r   r`  O  s   z*ContentCreationService._is_first_iterationc              
   C   s(  zt |ddd}| }W d   n1 sw   Y  |}d}d}g d}|D ]}tj|d|tjtjB d}	|	|kr?d	}|	}q(d
dl}
t|
j}|	d g }|D ]%}|dks`|dks`|dkrf|
| qR||v rp|
| qR|
d d	}qRd|}tdd|}tdd|}tdd|}tdd|}tdd|}dd |dD }d|}|dd}| |}||krt |ddd}|| W d   n1 sw   Y  tdtj|  d	|dW S ddiW S  ty } ztd| d |  dt|d!W  Y d}~S d}~ww )"zAClean content file by removing word counts and special charactersrc   rd   re   NF)>\n*(?:Word\s*Count|word\s*count|WORD\s*COUNT)\s*:?\s*\d+\s*\n*/\n*CONTENT_(?:REJECTED|TOO_SHORT).*?\n*(?=\n|$)D\n*###?\s*(?:Validation|Word Count|Feedback).*?(?=\n###|\n##|\n#|\Z)z+\n*(?:Reading time|Estimated reading).*?\n*z'\n*(?:Retry|Attempt|Regeneration).*?\n*\n\s*Total\s*$\n\s*Total\s*\n*$r   r   Tr      —–•°´`‘’"“”	r   r   [ \t]+\n\s*\n\s*\n+

z\*\*\s+z**z\s+\*\*z#\s+r   c                 S   s   g | ]}|  qS r   )rstrip)r   r   r   r   r   r     s    z>ContentCreationService._clean_content_file.<locals>.<listcomp>   ﻿r   u       ✓ Cleaned: )ri  rf  ri  u       ✗ Error cleaning rw   )ri  error)rj   rk   r   r   r   r   stringset	printableupdater   r   r   r8   _remove_trailing_metadatar   r"   r   r   basenamer   rU   )r0   	file_pathro   r   original_contentri  rf  word_count_patternsr   new_contentrz  allowed_charscleaned_contentcharr   r   r   r   r   rg  U  sb   








z*ContentCreationService._clean_content_filec                 C   s^  | j  }td|}|rt| }nd}|dd}d| j d| d| d}d	| j d| d| d}||||||d
||||	|
dd}td|  td| d|  td| j j	 d| j j
  ||t||||||||	|
|| d| dd td|  | jj|d}|j|d | | t|ddd}| }W d   n1 sw   Y  ddd |
D }td|  t||||| d| }| |}| jj|d}||d< d |d!< |j|d | | d"| j d| d| d}tjtj|d#d$ t|d%dd}|| W d   n	1 s#w   Y  | | d&S )'z4Create content for a new lesson WITH ENHANCED PROMPTr=     r2   r3   rB  r5   r6   r&   r7   )r   namer9  r:  r;  )r   titler;  
objectivesr<  )CourseModuleLessonz*The user journey for content creation is: z(The word count for content creation is: z to z.The level and motive for content creation is: z and a  
            ## CRITICAL FORMATTING RULES - MUST FOLLOW:
            
            1. **NO WORD COUNT REFERENCES:**
            - DO NOT include "Word Count:", "word count:", "Total words:" or any word count information
            - DO NOT mention word count validation in the content
            - DO NOT add word count numbers anywhere
            - DO NOT add standalone word "Total" at the end of content
            
            2. **NO VALIDATION COMMENTS:**
            - DO NOT include "CONTENT_REJECTED", "CONTENT_TOO_SHORT", or validation feedback
            - DO NOT mention validation or approval processes
            - Only output the actual lesson content
            
            3. **CLEAN SPECIAL CHARACTERS:**
            - Use standard ASCII characters only
            - Avoid Unicode special characters that may cause display issues
            - Use standard punctuation: , . ! ? : ; " '
            
            4. **PROPER MARKDOWN FORMATTING:**
            - Use consistent heading levels: # for main title, ## for sections, ### for subsections
            - Use proper line breaks and spacing
            - Ensure all markdown syntax is correctly formatted
            
            5. **OUTPUT FORMAT:**
            - Only output the lesson content in clean markdown
            - No extra comments, notes, or explanations
            - No word count information
            - No validation messages
            )r  module_numberr'  rE  rF  r:   r  rH  lesson_objectivesrJ  user_journey
word_countstrict_formattingzCreating content file: )r,   rN   rc   rd   re   Nz, and c                 S   s   g | ]}d | d qS )'r   )r   topicr   r   r   r     s    z=ContentCreationService._create_new_lesson.<locals>.<listcomp>zThe topic sections are: r   a   
        ## IMPORTANT FORMATTING RULES:
        - DO NOT include word count or validation comments
        - Use clean, standard characters only
        - Format questions clearly with ### Q1:, ### Q2:, etc.
        - No extra text or explanations
        question_instructionsr4   Tra   r   z7Content and questions created and cleaned successfully.)r(   dictr   r   rT  r   r8   r*   r"   Levelmotiver}  rU   r'   second_stage_crewrW   rg  rj   rk   r   content_reviewerreview_and_enrich_content_clean_text_contentqa_gen_crewr   ri   r   r   r   )r0   r9   r'  rE  rF  rG  r:   r,  rH  rI  rJ  r  rK  rL  rO   duration_minutes_matchrY  r;   rn   r=   complete_datar^   ro   r   topic_sectionsupdated_dataupdated_content_pathout_filer   r   r   rV    s   
/




z)ContentCreationService._create_new_lessonc                 C   sd  |s|S |  |}g d}|D ]}tj|d|tjtjB d}qtjdd|tjd}tjdd|tjd}ddl}t|j}|d	 g }|D ]#}|dksS|d
ksS|dkrY|	| qE||v rc|	| qE|	d qEd
|}tdd|}tdd|}|dd}|d}g }	|D ]}
|
 }|r| dkr|		|
 q|r| dkrqqd
|	}| S )zTClean text content by removing word counts, special characters, and trailing 'Total')rl  rm  rn  r   r   ro  r   rp  r   Nrq  rr  rs  r   rt  ru  rv  rx  total)r~  r   r   r   r   rz  r{  r|  r}  r   r   r8   r   r   r   )r0   r   r   r   rz  allowedcleaned_charsr  r   cleaned_linesr   stripped_liner   r   r   r  5  sB   





z*ContentCreationService._clean_text_contentc                 C   s   |s|S |}g d}|D ]	}t |d|}q|d}g }h d}|D ]1}|  }	|	|v r0q#d}
|D ]}|	|d sH|	|d sH|	|krLd}
 nq4|
sT|| q#d|}t d	d|}| S )
zFRemove trailing metadata words and phrases from text - SIMPLER VERSION)z?(?:\n\s*)?[Tt][Oo][Tt][Aa][Ll]\s*(?:[Ww][Oo][Rr][Dd][Ss]?\s*)?$z:\n\s*[Tt][Oo][Tt][Aa][Ll]\s*\n\s*[Ww][Oo][Rr][Dd][Ss]?\s*$z\n\s*[Tt][Oo][Tt][Aa][Ll]\s*$z\n\s*[Ww][Oo][Rr][Dd][Ss]?\s*$z\n\s*\d+\s*(?:words?|total)\s*$r   r   >   endwordcountr  wordsr  
conclusionF:r   Tz2\n.*[Tt][Oo][Tt][Aa][Ll].*[Ww][Oo][Rr][Dd][Ss]?.*$)r   r   r   r   r   r   r   r   )r0   r   original_textpatternsr   r   r  metadata_wordsr   stripped	skip_liner  r   r   r   r~  x  s8   


z0ContentCreationService._remove_trailing_metadataN)__name__
__module____qualname__r1   r>   rG   r_   rr   rs   rt   rm   ry   r   r   r   r   r   r   rl   rS   r/  r!  r\  rU  rj  r`  rg  rV  r  r~  r   r   r   r   r%   E   s<    
$ICe4 79	GIi{T Cr%   )"sysfastapir   r   utils.hashingr   user_journey_service.crewr   2user_journey_service.processors.duration_estimatorr   5user_journey_service.processors.StagewiseCourseParserr   0user_journey_service.processors.content_reviewerr   8user_journey_service.processors.user_journey_synthesizerr	   *user_journey_service.tools.custom_stt_toolr
   duration_estimatorr  synthesizerstt_toolpathlibr   r   r   gttsr   pydubr   
subprocessr$   pydub_configuredr%   r   r   r   r   <module>   s0    )