o
    iY(                    @   s   d dl Z d dlmZ d dlZddlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d d	lmZ d d
lmZ e	 Ze Ze Ze Zd dlmZ d dlZd dlZd dlmZ d dlmZ d dl Z d dlZd dlZd dl Z dd Z!e! Z"G dd dZ#dS )    N)HTTPException   )HashGenerator)UserJourney)MicrolearningDurationEstimator)CourseOutlineParser)ContentReviewer)Synthesizer)LiveWhisperSTTTool)PathgTTS)AudioSegmentc                     s~   t jt jt  fdd} |  }|r9|t_t jt j|d}t j|r-|t_	n|t_	t
d|  dS t
d dS )z!Configure pydub with ffmpeg pathsc                     sv   t j d} t j| r| S tddD ]$}t jj gdg| R  }t jt j|d} t j| r8|   S qd S )Nz
ffmpeg.exe      z..)ospathjoinexistsrangeabspath)ffmpeg_pathi
parent_dircurrent_dir /home/azureuser/microlearn/backend/user_journey_with_openai/agentic_workflow/src/user_journey_service/services/content_creation_service.pyfind_ffmpeg"   s   z$configure_pydub.<locals>.find_ffmpegzffprobe.exeu   ✓ Pydub configured with: Tu=   ⚠ Warning: ffmpeg.exe not found. Audio conversion may fail.F)r   r   dirnamer   __file__r   	converterr   r   ffprobeprint)r   r   ffprobe_pathr   r   r   configure_pydub   s   r%   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d6 Zd:d; Zd<d= Z d>d? Z!d@dA Z"dBdC Z#dDS )EContentCreationServicec                 C   sn   t  | _|| _t|| _d| j d| _d| j d| _d| j d| _d| j d| _	d| j d| _
d S )Nz	research/.mdzoutput/zoutput1/zoutput2/zparsed_course_content/z.json)r   crew_instance
input_datar   generate_input_hash
input_hashresearch_fileoutput_fileoutput_file_1output_file_2json_output_path)selfr)   r   r   r   __init__I   s   zContentCreationService.__init__c                 C   sX   | dd}td| j d| d| d}td| j d| d| d}| o+| S )z9Check if content and questions already exist for a lesson._updated_content//module_lessonr'   	question/)replacer   r+   r   )r1   
module_idxlesson_numberlesson_safeupdated_content_filequestion_filer   r   r   _has_updated_content_questionsT   s   z5ContentCreationService._has_updated_content_questionsc           	      C   s   t d |dd}td| j d| d| d}td| j d| d| d	}td| j d| d| d
}td| j d| d| d}| oU| oU| oU| }t d|  |S )z'Check if audio files exist for a lessonz.Inside audio files availability check functionr3   r4   audio/lessons/r6   r7   .wavaudio/questions/z/1.wavz/2.wavz/3.wavzThe output is : )r#   r9   r   r+   r   )	r1   r:   r;   r<   audio_contentaudio_question1audio_question2audio_question3resultr   r   r    _has_audio_content_and_questions^   s   z7ContentCreationService._has_audio_content_and_questionsc                 C   s   i }t d| j d| d}td|  t d| j d| d}t|d}|D ]-}t| | |}td td	 ttd
}||d}	|||< | j	 }
|
j
|	d q*d|i}	| j }
|
j
|	d d S )Naudio/stage//stagerA   z$The audio content is available at : rB   /*.wavu/   🎤 Press Enter when you're ready to answer...u    📢 Listening to your answer...zEnter your answer: )questionanswerinputsusers_response)r   r+   r#   sortedglobwav_to_text_whisperinputstrr(   evaluator_crewkickoffassessment_crew)r1   idxall_qns_answerrC   audio_question	wav_fileswav_filerM   rN   rP   crewr   r   r   _run_evaluations   s$   



z&ContentCreationService._run_evaluationc                    s  ddl }z\z2| } fdd}|| }ddl}tdD ]}| r.|   W W S |d q W W dS  ty`   |	 }|
| z| }	|	W |   Y W S |  w w  ty }
 ztd|
  ddl}|  W Y d}
~
dS d}
~
ww )zr
        Synchronous wrapper for text_to_wav async function
        Works in both sync and async contexts
        r   Nc                      s     I d H S )N)text_to_wavr   r-   r1   textr   r   wrapper   s   z8ContentCreationService.text_to_wav_sync.<locals>.wrapper
   g      ?u   ❌ Error in text_to_wav_sync: )asyncioget_running_loopcreate_tasktimer   donerG   sleepRuntimeErrornew_event_loopset_event_looprun_until_completera   close	Exceptionr#   	traceback	print_exc)r1   rc   r-   rf   looprd   taskri   r4   rG   err   r   rb   r   text_to_wav_sync   s6   
z'ContentCreationService.text_to_wav_syncc           
      C   s  | dd}td| j d| d| d}| r!td|  n@tjtjd| j dd	d
 td| j d| d| d}t	|ddd}|
 }W d   n1 sVw   Y  | || td| j d| d| d}t|d}	|	stj|d	d
 td | || dS dS )zCreate audio files for a lessonr3   r4   r@   r6   r7   .mp3z/Audio for the content is already available at: rK   Texist_okr5   r'   rutf-8encodingNrB   *.mp3Question audio is not available)r9   r   r+   r   r#   r   makedirsr   r   openreadra   rR   rS   _create_audio_question)
r1   r:   r;   r<   rC   content_fileftext_contentaudio_question_dirr]   r   r   r   _create_audio_files   s"    
z*ContentCreationService._create_audio_filesc           	      C   s   | dd}td| j d| d| d}tjtjd| j ddd	 td
| j d| d| d}t|ddd}| }W d   n1 sJw   Y  | 	|| td| j d| d| d}tj|dd	 t
d | || dS )z>Create audio files for a lesson without checking if they existr3   r4   r@   r6   r7   rx   rK   Try   r5   r'   r{   r|   r}   NrB   r   )r9   r   r+   r   r   r   r   r   r   ra   r#   r   )	r1   r:   r;   r<   rC   r   r   r   r   r   r   r   !_create_audio_files_without_check   s    
z8ContentCreationService._create_audio_files_without_checkc                 C   s  t d| j d| d}| rtd|  n=tjtjd| j ddd t d| j d| d	}t|d
dd}|	 }W d    n1 sJw   Y  | 
|| t d| j d| d}t|d}|stjtjd| j d| ddd td | | d S d S )NrI   rJ   rA   z/Audio for the content is already available at :rK   Try   r5   r'   r{   r|   r}   rB   rL   zquestion audio is not available)r   r+   r   r#   r   r   r   r   r   r   ra   rR   rS   r   )r1   rZ   rC   r   r   r   r\   r]   r   r   r   _create_audio_files_04_01_2026   s     
&z5ContentCreationService._create_audio_files_04_01_2026c                 C   s  | dd}td| j d| d| d}| rt|ddd	}| }W d
   n1 s/w   Y  | |}tdt| d| d|  t	|ddD ]1\}}	td| d| d| d|	d
d  d	 td| j d| d| d| d	}
| 
|	|
 qNd
S td|  d
S )z6Create audio files for questions for a specific lessonr3   r4   r8   r6   r7   r'   r{   r|   r}   NzFound z* questions to convert to audio for Module z	, Lesson r   startu   
🧠 Asking Question z for Module : d   z...
rB   rK   rA   zQuestion file not found: )r9   r   r+   r   r   r   parse_questionsr#   len	enumeratera   )r1   r:   r;   r<   r>   r   	questionsparsed_questionsindexrM   r\   r   r   r   r      s   

*$z-ContentCreationService._create_audio_questionc                 C   s   | d}g }g }|D ]8}| }|ds|dr7|r)|d|  g }tdd|}tdd|}|rC|dsC|| q|rP|d|  dd	 |D }|smd
}t||tjtj	B }dd	 |D }|dd S )z"Parse questions from markdown text
z### Q z^### Q\d+:\s* z^### Q\d+:.*?\s*z###c                 S   s    g | ]}|rt |d kr|qS )re   )r   .0qr   r   r   
<listcomp>!  s     z:ContentCreationService.parse_questions.<locals>.<listcomp>z#### Q\d+:?\s*(.*?)(?=\n### Q\d+:|$)c                 S      g | ]
}|  r|  qS r   stripr   r   r   r   r   '      N	   )
splitr   
startswithappendr   resubfindallDOTALL
IGNORECASE)r1   md_textlinesr   current_questionlinepatternr   r   r   r   	  s,   

z&ContentCreationService.parse_questionsc                 C   s  |sdS t |}g d}|D ]}tj|d|tjtjB d}qdddddddd}| D ]\}}tj|||tjtjB d}q,tdd|}g }|dD ]'}| }|rs|rn|d	 	 rnt
|d
krn|d	  |d
d  }|| qLd|}tdd| }|S )zz
        Intelligently clean text for audio conversion.
        Handles various markdown and formatting patterns.
        r   )z
^#{1,6}\s+z\*\*|\*|__|_z	```.*?```z`[^`]+`z!?\[.*?\]\(.*?\)z&[a-z]+;z^\s*[-*+]\s+z^\s*\d+\.\s+z^>\s+z\|-+\|z	^\|.*?\|$flagsz\1. )z^#{1,3}\s+(.*?)\s*$z^\s*[-*+]\s+(.*?)\s*$z^\s*\d+\.\s+(.*?)\s*$zA^(Introduction|Conclusion|Summary|Note|Tip|Warning|Important):\s*z"^(Step \d+|Phase \d+|Part \d+):\s*z^(Q\d+|Question \d+)[:.]?\s*z'^(Easy|Moderate|Difficult|Hard)[-:]?\s*z	[.!?]{2,}r3   r   r   r   Nr   z\s+)rV   r   r   	MULTILINEr   itemsr   r   r   islowerr   upperr   r   )r1   rc   patterns_to_remover   replacementsreplacementr   r   r   r   r   intelligent_text_cleaner-  s8   

z/ContentCreationService.intelligent_text_cleanerc           	      C   s  |sdS | d}g }|D ]}| }|sq|dr*|d|dd    q|dr>|d|d	d    q|d
rR|d|dd    q|drd||dd    q|dss|dss|dr|dd  }tdd|}tdd|}|d|  qtd|rtdd|}tdd|}|d|  q|}tdd|}tdd|}tdd|}tdd|}| r|| qg }t|D ]\}}|| |ds|dr|d qd| S )z
        Optimal markdown to speech converter - simpler but effective.
        Preserves semantic meaning without over-engineering.
        r   r   # zMain topic: r   N## 	Section:    ### Subsection: r   ####    z- z* z+ z[\*_]{1,2}(.*?)[\*_]{1,2}\1z	`([^`]+)`zcode \1   • 	^\d+\.\s+\*\*(.*?)\*\*	\*(.*?)\*\[([^\]]+)\]\([^)]+\))zMain topic:Section:zSubsection:)	r   r   r   r   r   r   matchr   r   )	r1   rc   r   speech_linesr   item
clean_linerG   r   r   r   r   optimal_markdown_to_speechv  sT   








z1ContentCreationService.optimal_markdown_to_speechc           	      C   s  |sdS t |}|d}g }|D ]d}| }|sq|dr1|dd  }|d|  q|drG|dd  }|d	|  q|d
r]|dd  }|d|  q|drs|dd  }|d|  q|dr|dd  }|d|  q|dr|dd  }|d|  qtd|rtdd|}| |}|d|  qtd|rtd|}|r|	d}|	d}| |}|d| d|  qtd|rtdd|}| |}d|
 v sd|v r|d|  q|d |  q|d!r0|dd  }| |}|d"|  qtd#|r=|d$ qd%|v ritd&|ritd'|shd(d) |d%D }|rh|d*d+|  q| |}|rv|| q| d|S ),z
        Convert markdown to natural speech while preserving semantic meaning.
        Handles: # Headings, **bold**, *italic*, - bullets, 1. numbered lists, `code`, [links](url)
        r   r   z######    Nr   z##### r   zMinor section: r   r   r   r   r   zTopic: r   r   z	Chapter: r   r   zTitle: z	^[-*+]\s+r   r   z^(\d+)\.\s+(.*)Point r   z^\[[ xX]\]\s+z[x]z[X]zCompleted: zTo do: z> zQuote: z^[-*_]{3,}$z---|z
\w.*\|.*\wz^\|?[-:| ]+\|?$c                 S   r   r   r   )r   cellr   r   r   r     r   z=ContentCreationService.markdown_to_speech.<locals>.<listcomp>zTable row: z; )rV   r   r   r   r   r   r   r   _remove_inline_formattinggrouplowersearchr   _add_speech_pauses)	r1   rc   r   result_linesr   contentr   numbercellsr   r   r   markdown_to_speech  s|   















z)ContentCreationService.markdown_to_speechc                 C   s   |sdS |}dd }dd }dd }dd	 }t d
||}t d||}t d||}t d||}t d||}t d||}t dd|}t dd|}|dd}|dd}|dd}|dd}| S )zV
        Remove inline markdown formatting while preserving semantic meaning.
        r   c                 S      |  d}| S Nr   r   r   r   r   r   r   replace_bold)     
zFContentCreationService._remove_inline_formatting.<locals>.replace_boldc                 S   r   r   r   r   r   r   r   replace_italic.  r   zHContentCreationService._remove_inline_formatting.<locals>.replace_italicc                 S      |  d}d| S )Nr   zcode: r   r   r   r   r   replace_inline_code3     

zMContentCreationService._remove_inline_formatting.<locals>.replace_inline_codec                 S   r   )Nr   zstrikethrough: r   r   r   r   r   replace_strikethrough8  r   zOContentCreationService._remove_inline_formatting.<locals>.replace_strikethroughr   r   z	__(.*?)__z_(.*?)_z`(.*?)`z	~~(.*?)~~r   r   z!\[([^\]]*)\]\([^)]+\)z	Image: \1z&nbsp;r   z&amp;andz&lt;z	less thanz&gt;zgreater than)r   r   r9   r   )r1   rc   originalr   r   r   r   r   r   r   r     s(   z0ContentCreationService._remove_inline_formattingc                 C   s   | d}g }t|D ]_\}}| }|sq|dr&|| |d q|drG|| |t|d k rF||d  dsF|d q|| |t|d k rj||d   rj||d  dsj|d qd|S )zK
        Add natural pauses for speech based on content structure.
        r   )Title:Chapter:zTopic:r   r   )r   r   z
Completed:zTo do:r   )r   r   )r   r   r   r   )r   r   r   r   r   r   r   )r1   rc   r   rG   r   r   r   r   r   r   R  s&   




"

2

z)ContentCreationService._add_speech_pausesc                 C   st  t d|  z]t| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	d
d}|| t d t|	dd}t
j|rbt
	|| t d|  |W S W dS  ty } zIt d|  ddl}|  t|d }	t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W Y d}~dS d}~ww )z3Convert text to WAV audio file - SIMPLIFIED VERSIONConverting text to audio: ,Warning: Empty text, skipping audio creationNr   temp_audio_   rx   Creating MP3 with gTTS: enFrc   langslow   ✓ gTTS conversion completerA      ✓ Audio saved as    ❌ Error in text_to_wav: 
.error.txtwr|   r}   Error: 

Text:   Saved error details to: )r#   rV   r   r   uuiduuid4hexr   saver9   r   r   r   rq   rr   rs   r   write)r1   rc   r-   r   temp_mp3tts
mp3_outputrv   rr   
error_filer   r   r   r   text_to_wav_oldr  s@   

 z&ContentCreationService.text_to_wav_oldc                    s"   t |d}||I dH  dS )z%Convert text to speech using Edge TTSen-US-JennyNeuralN)edge_ttsCommunicater   )r1   rc   output_pathcommunicater   r   r   _text_to_speech_edge  s   z+ContentCreationService._text_to_speech_edgec                    s6  t d|  zt| }| |}|st d W dS t|}|jjddd zeg d}|D ]X}z:t||}|	d}|
t|I dH  | rk| jdkrk|| t d	| d
|  t|W   W W S W q0 ty } zt d| d|  W Y d}~q0d}~ww t d W n ty } zt d|  W Y d}~nd}~ww z4ddlm}	 |	|ddd}
|

t| | r| jdkrt d|  t|W W S W W dS W W dS  ty } zt d|  W Y d}~W dS d}~ww  ty } zt d|  ddl}|  W Y d}~dS d}~ww )z2Convert text to MP3 audio file - OPTIMIZED VERSIONr   r   NTparentsrz   )zen-US-AriaNeuralr  zen-US-GuyNeuralz	.temp.mp3i   u   ✓ Audio created with r   zVoice z	 failed: z*All Edge TTS voices failed, trying gTTS...zEdge TTS failed: r   r   r   Fr   u   ✓ Audio created with gTTS: zgTTS failed: r   )r#   rV   r   r   r   parentmkdirr  r  with_suffixr   r   statst_sizerenamerq   gttsr   rr   rs   )r1   rc   r-   voicesvoicer
  	temp_filevoice_error
edge_errorr   r  
gtts_errorrv   rr   r   r   r   ra     sn   


z"ContentCreationService.text_to_wavc                    s  t d|  zzt| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	}|	|I dH  t d
 t
|}|jjddd t d|j  tj|ryddl}||t| t d|  t|W S t d|  W dS  ty } z[t d|  ddl}|  t|d }	z/t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W n   t d Y W Y d}~dS W Y d}~dS d}~ww ).Convert text to WAV audio file - FIXED VERSIONr   r   Nr   r   r   rx   r   r  r   Tr      ✓ Directory created/verified: r      ❌ Temp file not found: r   r   r   r|   r}   r   r   r   r   Could not save error file)r#   rV   r   r   r   r   r   r  r  r   r   r  r  r   r   r   shutilmoverq   rr   rs   r   r   r1   rc   r-   r   r  r  r  rv   rr   r  r   r   r   r   text_to_wav_30  sR   

 
z%ContentCreationService.text_to_wav_30c                 C   s  t d|  zxt| }| |}|st d W dS ddl}d| jdd  d}t d|  t|d	d
d}|| t d t	|}|j
jddd t d|j
  tj|rvddl}||t| t d|  t|W S t d|  W dS  ty } z[t d|  ddl}|  t|d }	z/t|	ddd}
|
d| d|dd   W d   n1 sw   Y  t d|	  W n   t d Y W Y d}~dS W Y d}~dS d}~ww )r  r   r   Nr   r   r   rx   r   r   Fr   r   Tr  r  r   r  r   r   r   r|   r}   r   r   r   r   r  )r#   rV   r   r   r   r   r   r   r   r   r  r  r   r   r   r  r   rq   rr   rs   r   r   r!  r   r   r   text_to_wav_30012026'  sP   


 
z+ContentCreationService.text_to_wav_30012026c                 C   s6   t d td}|t|}t d|d  |d S )Nzinside wave to text conversionbaseu   🎤 Transcription:rc   )r#   whisper
load_model
transcriberV   )r1   r^   modelrG   r   r   r   rT   `  s
   
z*ContentCreationService.wav_to_text_whisperc                 C   s  zt j| jsdddW S td t| j}| }td|d  d |d }dddddd	}t|d
 ddD ]\}}|d }t	dd|
 }td| d|  t|d ddD ]f\}	}
|
d }td|}|rr|d}n| d|	 }t	dd|
 }|d  d7  < td| d|  | ||}|d  |d 7  < |d  |d 7  < |d  |d 7  < |d  |d  7  < q\q9d!d"| j|d#|d  d$|d  d%|d  d&d'W S  ty	 } zdd(l}| }td)|  td*|  td+d,| d-d(}~ww ).z6Creates audio files for existing content and questionsfailureUser journey is not created.statusmessagez"Starting audio creation process...Course Title: course_titler   r   )total_lessonscontent_audio_createdquestion_audio_createdskipped_contentskipped_questionsmodulesr   r   module_title^Module \d+:\s*r   Processing Module r   lessonslesson_titleLesson (\d+\.\d+)r3   ^Lesson \d+\.\d+:\s*r0    Processing Lesson r1  content_createdr2  questions_createdr3  content_skippedr4  questions_skippedsuccesszAudio creation completed.zCreated z content audio files and z question audio files for z	 lessons.)r,  r-  hashidstatssummaryNzERROR in run_audio_creation: zTraceback: r   z$Unexpected error in audio creation: status_codedetail)r   r   r   r-   r#   r   parse_contentr   r   r   r   r   r   _create_lesson_audio_filesr+   rq   rr   
format_excr   )r1   parserparsed_resultr/  audio_statsr:   moduler6  module_name
lesson_idxlessonlesson_title_fulllesson_number_matchr;   lesson_namelesson_audio_statsrv   rr   error_detailsr   r   r   run_audio_creationl  s^   
	"z)ContentCreationService.run_audio_creationc              
   C   s  ddddd}| dd}ztd| j d| d| d}td	| j d| d| d
}| r| std z?t|ddd}| }W d   n1 sOw   Y  | ||}	|	rl|d  d7  < td|	  ntd W W |S W W |S  ty }
 ztd|
  ddl	}|
  W Y d}
~
W |S d}
~
ww td|  |d  d7  < W |S td|  W |S  ty }
 ztd|
  ddl	}|
  W Y d}
~
|S d}
~
ww )9Create audio files for a specific lesson and return statsr   r>  r?  r@  rA  r3   r4   r@   r6   r7   rx   r5   r'       Creating content audio...r{   r|   r}   Nr>  r   u"       ✓ Content audio created at: u&       ✗ Failed to create content audio&       ✗ Error creating content audio: z"    Content audio already exists: r@  %    Warning: Content file not found: u)       ✗ Error in audio creation process: )r9   r   r+   r   r#   r   r   rw   rq   rr   rs   )r1   r:   r;   rD  r<   content_audio_pathr   r   r   audio_resultrv   rr   r   r   r   rJ    sV   
	z1ContentCreationService._create_lesson_audio_filesc              
   C   s  t d| j d| d| d}t d| j d| d| d}| r| r-t|dng }| s:|jddd	 |rRtd
t| d |d  t|7  < dS td zUt|ddd}|	 }	W d   n1 smw   Y  | 
|	}
d}t|
ddD ]\}}|| d }| s| || |d7 }q|d  |7  < td| d W dS  ty } ztd|  W Y d}~dS d}~ww td|  dS )z'Create audio files for lesson questionsrB   r6   r7   rK   r8   r'   r   Tr      # question audio files already existrA  $    Creating question audio files...r{   r|   r}   Nr   r   r   rx   r?         ✓ Created  question audio files'       ✗ Error creating question audio: &    Warning: Question file not found: )r   r+   r   rR   rS   r  r#   r   r   r   r   r   ra   rq   )r1   r:   r;   r<   rD  question_audio_dirr>   existing_audior   r   r   created_countr   rM   audio_question_pathrv   r   r   r   _create_question_audio_files  s:   

z3ContentCreationService._create_question_audio_filesc              
   C   s  ddddd}| dd}td| j d| d| d}td	| j d| d| d
}| r| std z/t|ddd}| }W d   n1 sNw   Y  | || |d  d7  < td W n- ty }	 ztd|	  W Y d}	~	nd}	~	ww td |d  d7  < ntd|  td| j d| d| d}
td| j d| d| d
}| r_|
 rt	|

dng }|
 s|
jddd |rtdt| d |d  t|7  < |S td  zXt|ddd}| }W d   n	1 sw   Y  | |}d}t|dd!D ]\}}|
| d }| s/| || |d7 }q|d"  |7  < td#| d$ W |S  ty^ }	 ztd%|	  W Y d}	~	|S d}	~	ww td&|  |S )'rY  r   rZ  r3   r4   r@   r6   r7   rx   r5   r'   r[  r{   r|   r}   Nr>  r   u       ✓ Content audio createdr\  z     Content audio already existsr@  r]  rB   rK   r8   r   Tr  r`  ra  rA  rb  r   r?  rc  rd  re  rf  )r9   r   r+   r   r#   r   r   ra   rq   rR   rS   r  r   r   r   )r1   r:   r;   rD  r<   r^  r   r   r   rv   rg  r>   rh  r   r   ri  r   rM   rj  r   r   r   ,_create_lesson_audio_files_before_30_01_2026  st   




zCContentCreationService._create_lesson_audio_files_before_30_01_2026c                 C   s  zTt j| jsdddW S t j| jrStd t| j}| }t jt j| j	dd |
| j	| td|d  d	 |d }d
d
d
d
d
d}t|d ddD ]\}}|d }|d }|d }	|d }
td| d|  tdd| }t|d ddD ]\}}|d }|d }|d }|d }td|}|r|d}n| d| }tdd| }|d  d7  < td| d|  td|}|rt| }|d  }|d! }nd"}d#}d$| j d%| d&|dd' d(}d)| j d%| d&|dd' d(}| ||r3td* | j||||	|
||||||||d+}|d,  d7  < |d-  d7  < qtd. |d/  d7  < |d0  d7  < qqUd1d2| j|d3d4W S W d8S  tyk } z	td5d6| d7d8}~ww )9>Creates content for each module and lesson (NO audio creation)r)  r*  r+  The user journey is createdTry   r.  r/  r   r   )r0  r>  content_updatedr?  questions_updatedr5  r   r   r6  focusoutcomedurationr8  r   r7  r   r9  r:  topicsr;  r3   r<  r0  r=  (\d+)x      `	  (
  content/r6   r7   r4   r'   r8   )    Creating new content and questions...r:   rP  module_focusmodule_outcomemodule_durationr;   rU  lesson_durationlesson_outcomelesson_topicsr/  word_count_lowerword_count_upperr>  r?  '    Content and questions already existro  rp  rB  AContent and questions created (audio will be created separately).-Call /run-audiocreation to create audio filesr,  r-  rC  rD  	next_stepr   Error parsing result: rF  N)r   r   r   r-   r#   r   rI  r   r   r0   export_to_jsonr   r   r   r   r   r   intr+   r9   _should_create_content_create_new_lessonrq   r   )r1   rL  rM  r/  content_statsr:   rO  r6  r}  r~  r  rP  rQ  rR  rS  r  r  r  rT  r;   rU  duration_matchminuteslower_rangeupper_ranger   r>   rG   rv   r   r   r   run_content_creation_23012026V  s   
	
"";`z4ContentCreationService.run_content_creation_23012026c              	   C   t   | dd}tjd| j d| d| d}tjd| j d| d| d}|  r2|r0|s2dS |  s8dS d	S 
z$Check if content needs to be createdr3   r4   rz  r6   r7   r'   r8   TFr9   r   r   r   r+   _is_first_iterationr1   r:   r;   r<   content_existsquestion_existsr   r   r   r       ""z-ContentCreationService._should_create_contentc                 C   sp  zt j| jsdddW S t j| jrtd t| j}| }t jt j| j	dd |
| j	| td|d  d	 |d }d
d
d
d
d
d
d
d}t|d ddD ]<\}}|d }|d }|d }	|d }
td| d|  tdd| }t|d ddD ]\}}|d }|d }|d }|d }td|}|r|d}n| d| }tdd| }|d  d7  < td| d|  td|}|rt| }|d  }|d! }nd"}d#}|dd$}d%| j d&| d'| d(}d)| j d&| d'| d(}| ||rtd* | j||||	|
||||||||d+}|d,  d7  < |d-  d7  < t j|rY| |}|rY|d.  d7  < |d/rY|d/  d7  < t j|r~| |}|r~|d.  d7  < |d/r~|d/  d7  < qtd0 |d1  d7  < |d2  d7  < qqWd3d4| j|d5d6W S W d:S  ty } z	td7d8| d9d:}~ww );rm  r)  r*  r+  rn  Try   r.  r/  r   r   )r0  r>  ro  r?  rp  word_count_removedspecial_chars_cleanedr5  r   r   r6  rq  rr  rs  r8  r   r7  r   r9  r:  rt  r;  r3   r<  r0  r=  ru  rv  rw  rx  ry  r4   rz  r6   r7   r'   r8   r{  r|  r>  r?  r  r  r  ro  rp  rB  r  r  r  r   r  rF  N)r   r   r   r-   r#   r   rI  r   r   r0   r  r   r   r   r   r   r   r  r9   r+   r  r  _clean_content_filegetrq   r   )r1   rL  rM  r/  r  r:   rO  r6  r}  r~  r  rP  rQ  rR  rS  r  r  r  rT  r;   rU  r  r  r  r  r<   r   r>   rG   cleanedrv   r   r   r   run_content_creation  s   



Krz+ContentCreationService.run_content_creationc              	   C   r  r  r  r  r   r   r   r  J  r  c                 C   s&   t d| jj  | jjdkrdS dS )NzInside feedback test : zFirst iterationTF)r#   r)   feedback)r1   r   r   r   r  Y  s   z*ContentCreationService._is_first_iterationc              
   C   s(  zt |ddd}| }W d   n1 sw   Y  |}d}d}g d}|D ]}tj|d|tjtjB d}	|	|kr?d	}|	}q(d
dl}
t|
j}|	d g }|D ]%}|dks`|dks`|dkrf|
| qR||v rp|
| qR|
d d	}qRd|}tdd|}tdd|}tdd|}tdd|}tdd|}dd |dD }d|}|dd}| |}||krt |ddd}|| W d   n1 sw   Y  tdtj|  d	|dW S ddiW S  ty } ztd| d |  dt|d!W  Y d}~S d}~ww )"zAClean content file by removing word counts and special charactersr{   r|   r}   NF)>\n*(?:Word\s*Count|word\s*count|WORD\s*COUNT)\s*:?\s*\d+\s*\n*/\n*CONTENT_(?:REJECTED|TOO_SHORT).*?\n*(?=\n|$)D\n*###?\s*(?:Validation|Word Count|Feedback).*?(?=\n###|\n##|\n#|\Z)z+\n*(?:Reading time|Estimated reading).*?\n*z'\n*(?:Retry|Attempt|Regeneration).*?\n*\n\s*Total\s*$\n\s*Total\s*\n*$r   r   Tr      —–•°´`‘’"“”	r   r   [ \t]+\n\s*\n\s*\n+

z\*\*\s+z**z\s+\*\*z#\s+r   c                 S   s   g | ]}|  qS r   )rstrip)r   r   r   r   r   r     s    z>ContentCreationService._clean_content_file.<locals>.<listcomp>   ﻿r   u       ✓ Cleaned: )r  r  r  u       ✗ Error cleaning r   )r  error)r   r   r   r   r   r   stringset	printableupdater   r   r   r9   _remove_trailing_metadatar   r#   r   r   basenamerq   rV   )r1   	file_pathr   r   original_contentr  r  word_count_patternsr   new_contentr  allowed_charscleaned_contentcharr   rv   r   r   r   r  _  sb   








z*ContentCreationService._clean_content_filec                 C   s^  | j  }td|}|rt| }nd}|dd}d| j d| d| d}d	| j d| d| d}||||||d
||||	|
dd}td|  td| d|  td| j j	 d| j j
  ||t||||||||	|
|| d| dd td|  | jj|d}|j|d | | t|ddd}| }W d   n1 sw   Y  ddd |
D }td|  t||||| d| }| |}| jj|d}||d< d |d!< |j|d | | d"| j d| d| d}tjtj|d#d$ t|d%dd}|| W d   n	1 s#w   Y  | | d&S )'z4Create content for a new lesson WITH ENHANCED PROMPTru     r3   r4   rz  r6   r7   r'   r8   )r   namerq  rr  rs  )r   titlers  
objectivesrt  )CourseModuleLessonz*The user journey for content creation is: z(The word count for content creation is: z to z.The level and motive for content creation is: z and a  
            ## CRITICAL FORMATTING RULES - MUST FOLLOW:
            
            1. **NO WORD COUNT REFERENCES:**
            - DO NOT include "Word Count:", "word count:", "Total words:" or any word count information
            - DO NOT mention word count validation in the content
            - DO NOT add word count numbers anywhere
            - DO NOT add standalone word "Total" at the end of content
            
            2. **NO VALIDATION COMMENTS:**
            - DO NOT include "CONTENT_REJECTED", "CONTENT_TOO_SHORT", or validation feedback
            - DO NOT mention validation or approval processes
            - Only output the actual lesson content
            
            3. **CLEAN SPECIAL CHARACTERS:**
            - Use standard ASCII characters only
            - Avoid Unicode special characters that may cause display issues
            - Use standard punctuation: , . ! ? : ; " '
            
            4. **PROPER MARKDOWN FORMATTING:**
            - Use consistent heading levels: # for main title, ## for sections, ### for subsections
            - Use proper line breaks and spacing
            - Ensure all markdown syntax is correctly formatted
            
            5. **OUTPUT FORMAT:**
            - Only output the lesson content in clean markdown
            - No extra comments, notes, or explanations
            - No word count information
            - No validation messages
            )r/  module_numberrP  r}  r~  r;   r:  r  lesson_objectivesr  user_journey
word_countstrict_formattingzCreating content file: )r-   rO   r{   r|   r}   Nz, and c                 S   s   g | ]}d | d qS )'r   )r   topicr   r   r   r     s    z=ContentCreationService._create_new_lesson.<locals>.<listcomp>zThe topic sections are: r   a   
        ## IMPORTANT FORMATTING RULES:
        - DO NOT include word count or validation comments
        - Use clean, standard characters only
        - Format questions clearly with ### Q1:, ### Q2:, etc.
        - No extra text or explanations
        question_instructionsr5   Try   r   z7Content and questions created and cleaned successfully.)r)   dictr   r   r  r   r9   r+   r#   Levelmotiver  rV   r(   second_stage_crewrX   r  r   r   r   content_reviewerreview_and_enrich_content_clean_text_contentqa_gen_crewr   r   r   r   r   )r1   r:   rP  r}  r~  r  r;   rU  r  r  r  r/  r  r  rP   duration_minutes_matchr  r<   r   r>   complete_datar_   r   rc   topic_sectionsupdated_dataupdated_content_pathout_filer   r   r   r    s   
/




z)ContentCreationService._create_new_lessonc                 C   sd  |s|S |  |}g d}|D ]}tj|d|tjtjB d}qtjdd|tjd}tjdd|tjd}ddl}t|j}|d	 g }|D ]#}|dksS|d
ksS|dkrY|	| qE||v rc|	| qE|	d qEd
|}tdd|}tdd|}|dd}|d}g }	|D ]}
|
 }|r| dkr|		|
 q|r| dkrqqd
|	}| S )zTClean text content by removing word counts, special characters, and trailing 'Total')r  r  r  r   r   r  r   r  r   Nr  r  r  r   r  r  r  r  total)r  r   r   r   r   r  r  r  r  r   r   r9   r   r   r   )r1   rc   r   r   r  allowedcleaned_charsr  r   cleaned_linesr   stripped_liner   r   r   r  ?  sB   





z*ContentCreationService._clean_text_contentc                 C   s   |s|S |}g d}|D ]	}t |d|}q|d}g }h d}|D ]1}|  }	|	|v r0q#d}
|D ]}|	|d sH|	|d sH|	|krLd}
 nq4|
sT|| q#d|}t d	d|}| S )
zFRemove trailing metadata words and phrases from text - SIMPLER VERSION)z?(?:\n\s*)?[Tt][Oo][Tt][Aa][Ll]\s*(?:[Ww][Oo][Rr][Dd][Ss]?\s*)?$z:\n\s*[Tt][Oo][Tt][Aa][Ll]\s*\n\s*[Ww][Oo][Rr][Dd][Ss]?\s*$z\n\s*[Tt][Oo][Tt][Aa][Ll]\s*$z\n\s*[Ww][Oo][Rr][Dd][Ss]?\s*$z\n\s*\d+\s*(?:words?|total)\s*$r   r   >   endwordcountr  wordsrE  
conclusionF:r   Tz2\n.*[Tt][Oo][Tt][Aa][Ll].*[Ww][Oo][Rr][Dd][Ss]?.*$)r   r   r   r   r   r   r   r   )r1   rc   original_textpatternsr   r   r  metadata_wordsr   stripped	skip_liner  r   r   r   r    s8   


z0ContentCreationService._remove_trailing_metadataN)$__name__
__module____qualname__r2   r?   rH   r`   rw   r   r   r   r   r   r   r   r   r   r   r  r  ra   r"  r#  rT   rX  rJ  rk  rl  r  r  r  r  r  r  r  r  r   r   r   r   r&   H   sH    
+$ICe4 6?:9G2(Ii{T Cr&   )$sysfastapir   r   utils.hashingr   user_journey_service.crewr   2user_journey_service.processors.duration_estimatorr   5user_journey_service.processors.StagewiseCourseParserr   0user_journey_service.processors.content_reviewerr   8user_journey_service.processors.user_journey_synthesizerr	   *user_journey_service.tools.custom_stt_toolr
   duration_estimatorr  synthesizerstt_toolpathlibr   r   r%  r  r   pydubr   
subprocessrf   r  r%   pydub_configuredr&   r   r   r   r   <module>   s4    )