o
    4i<                     @   s@   d Z ddlZddlZddlmZ G dd dZe Zdd ZdS )zu
Course Statistics Utility
Analyzes the course outline file (output/{hashid}.md) to provide module and lesson counts
    N)Pathc                   @   sn   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd ZdS )CourseStatsNc                 C   sH   |du rt jt jt}t j|| _td| j  dS || _dS )z!Initialize with project root pathNu!   📁 Auto-detected project root: )ospathdirnameabspath__file__project_rootprint)selfr	   current_dir r   u/home/azureuser/microlearn/backend/user_journey_with_openai/agentic_workflow/src/user_journey_service/course_stats.py__init__
   s
   
zCourseStats.__init__c           
   
      s  t j| jd| d}td|  tdt jt j|  tdt j|  t j|sAtd|  |dd|d	S ztd
 t|ddd}| }W d   n1 s\w   Y  | 	|| t
 ts|tdt   |dddW S g d} fdd|D }|rtd|  tdt    |D ]%}|dkrd d< q|dkrd d< q|dkrd d< q|dkrg  d< q W S  ty } z"tdt|  ddl}	|	  |ddt| dW  Y d}~S d}~ww ) z
        Analyze the course outline file and return statistics
        
        Args:
            hashid: The hashid of the course
            
        Returns:
            dict: Course statistics including module count, lesson counts, etc.
        output.mdu$   🔍 Looking for course outline at: u   📁 Output directory exists: u   📄 File exists: u   ❌ File does not exist at: Fz=Course outline file not found. Please run /run-journey first.)hashidexistserrorsearched_locationu   📖 Opening file...rzutf-8)encodingNu   ❌ Stats is not a dictionary: Tz&Parser returned invalid data structure)r   r   r   )course_titlemodule_counttotal_lessonsmodulesc                    s   g | ]}| vr|qS r   r   ).0keystatsr   r   
<listcomp>C   s    z0CourseStats.get_course_stats.<locals>.<listcomp>u$   ❌ Missing required keys in stats: u   📊 Stats keys found: r   Unknown Courser   r   r   r   u   ❌ Error parsing file: zError parsing course outline: )r   r   joinr	   r
   r   r   openread_parse_course_outline
isinstancedicttypelistkeys	Exceptionstr	traceback	print_exc)
r   r   outline_pathfcontentrequired_keysmissing_keysr   er-   r   r   r   get_course_stats   sb   




zCourseStats.get_course_statsc                 C   sR  t d t d t d td|tj}|r|dnd}t d|  d}t||tj}t|}t d| d	 g }d
}	t|ddD ]\}
\}}t|}|	 }t d| d|  | 
||}| |}| |}| |}d}t||tj}t|}|	|7 }	t d| d g }|D ]2\}}|	 }t d| d|  | ||}| |}| |}| |}||||||d q||||||||d qD|	}t d t d t d t d|  t d|  t d|	  t d|	  t d|  t d |d|||	||	||dd| dd| d d!| d d"| d d#d$S )%z6Parse the markdown content to extract course structurez2==================================================u   📝 Parsing course outline...z^#\s+\*\*(.+?)\*\*   r!   u   📚 Course Title: z'^##\s+\*\*Module\s+(\d+):\s*([^*]+)\*\*u   📊 Found z modulesr   )startu   
📦 Module z: z,###\s+\*\*Lesson\s+(\d+\.\d+):\s*([^*]+)\*\*u     📖 Found  lessonsu       • Lesson )lesson_numberlesson_titledurationtopicsoutcome)module_numbermodule_titlefocusr=   r;   lesson_countlessonsz3
==================================================u   📊 SUMMARYzCourse: zTotal Modules: zTotal Lessons: zEstimated Content Files: zEstimated Audio Files: T)content_filescontent_audio_filestotal_audio_filesz/output/r   z	/content//z/updated_content/z/audio/lessons/)course_outlinecontent_directoryupdated_content_directoryaudio_lessons_directory)r   r   r   r   r   r   estimated_output
file_paths)r
   research	MULTILINEgroupfindalllen	enumerateintstrip_extract_module_section_extract_module_focus_extract_module_outcome_extract_module_duration_extract_lesson_section_extract_lesson_duration_extract_lesson_topics_extract_lesson_outcomeappend)r   r1   r   course_title_matchr   module_patternr   r   modules_listr   
module_idx
module_numr?   module_sectionmodule_focusmodule_outcomemodule_durationlesson_patternrB   rA   lesson_details
lesson_numr:   lesson_sectionlesson_durationlesson_topicslesson_outcometotal_content_audior   r   r   r%   ^   s   











z!CourseStats._parse_course_outlinec           	      C   s   d| d}t ||t j}|sdS | }d|d  d}t |||d d t j}|r<|d |  }||| S ||d S )z=Extract the section of content belonging to a specific modulez^##\s+\*\*Module\s+: r6   N)rM   rN   rO   r7   )	r   r1   rc   start_patternstart_match	start_posnext_module_pattern
next_matchend_posr   r   r   rV      s   z#CourseStats._extract_module_sectionc                 C   &   t d|t j}|r|d S dS )z(Extract module focus from module sectionz.### \*\*Module Focus\*\*\s*\n(.*?)(?=\n###|\Z)r6   rq   rM   rN   DOTALLrP   rU   )r   sectionfocus_matchr   r   r   rW         z!CourseStats._extract_module_focusc                 C   rx   )z*Extract module outcome from module sectionz0### \*\*Module Outcome\*\*\s*\n(.*?)(?=\n###|\Z)r6   rq   ry   )r   r{   outcome_matchr   r   r   rX      r}   z#CourseStats._extract_module_outcomec                 C   rx   )z+Extract module duration from module sectionz7### \*\*Total Module Duration\*\*\s*\n(.*?)(?=\n###|\Z)r6   rq   ry   )r   r{   duration_matchr   r   r   rY      r}   z$CourseStats._extract_module_durationc                 C   s4   dt | d}t ||t j}|r|dS dS )z5Extract a specific lesson section from module sectionz### \*\*Lesson z$:.*?\*\*(.*?)(?=\n### \*\*Lesson|\Z)r6   rq   )rM   escaperN   rz   rP   )r   rd   rj   patternmatchr   r   r   rZ      s   z#CourseStats._extract_lesson_sectionc                 C   rx   )z+Extract lesson duration from lesson sectionz,#### \*\*Duration\*\*\s*\n(.*?)(?=\n####|\Z)r6   rq   ry   )r   rk   r   r   r   r   r[      r}   z$CourseStats._extract_lesson_durationc                 C   sR   t d|t j}|r'|d }t d|t j}|s%dd |dD }|S g S )z)Extract lesson topics from lesson sectionz2#### \*\*Topics Covered\*\*\s*\n(.*?)(?=\n####|\Z)r6   u   [-•]\s*(.*?)(?=\n[-•]|\Z)c                 S   s   g | ]
}|  r|  qS r   )rU   )r   tr   r   r   r      s    z6CourseStats._extract_lesson_topics.<locals>.<listcomp>
)rM   rN   rz   rP   rU   rQ   split)r   rk   topics_matchtopics_textr<   r   r   r   r\      s   z"CourseStats._extract_lesson_topicsc                 C   rx   )z*Extract lesson outcome from lesson sectionz+#### \*\*Outcome\*\*\s*\n(.*?)(?=\n####|\Z)r6   rq   ry   )r   rk   r~   r   r   r   r]     r}   z#CourseStats._extract_lesson_outcomec                 C   sh  |  |}|rt|tstd|  |dddddS |dr8td|d   |d|d d	d|d
ddS |ddsJ|dd	d|d
ddS zC|d|ddd|dd d|dd d|dd|dd|di dd|di ddddd |dg D dW S  ty } ztd |  |dd!t| d"d#dW  Y d$}~S d$}~ww )%zGGet course statistics in a nicely formatted dictionary for API responseu   ❌ Invalid stats returned: FzFailed to parse course outlinezXCourse outline not found or invalid format. Please ensure /run-journey was called first.z4Jobs are still queued but course outline is missing.)r   r   r   warningnoter   u   ❌ Error in stats: zFCourse outline not found. Please ensure /run-journey was called first.r   unknown)r   r   r   r   r   r   r   )r   r   r   r   r   Tr   r!   zCourse contains r   r   z modules with r   r8   rK   rC   rE   )rC   audio_filesc                 S   s,   g | ]}| d | d| dddqS )r>   r?   rA   r   )r>   titlerA   )get)r   mr   r   r   r    <  s    
z3CourseStats.get_formatted_stats.<locals>.<listcomp>r   )r   r   r   summaryr   r   estimated_filesr   u   ❌ Error formatting stats: zError formatting course stats: z7Course outline exists but could not be parsed properly.z8Jobs are still queued but course preview is unavailable.N)r5   r&   r'   r
   r   r+   r,   )r   r   r   r4   r   r   r   get_formatted_stats
  sb   

	


	
 


zCourseStats.get_formatted_stats)N)__name__
__module____qualname__r   r5   r%   rV   rW   rX   rY   rZ   r[   r\   r]   r   r   r   r   r   r   	   s    
Hkr   c                 C   s
   t | S )z(Convenience function to get course stats)course_statsr   )r   r   r   r   r5   T  s   
r5   )__doc__r   rM   pathlibr   r   r   r5   r   r   r   r   <module>   s      J