from typing import Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
import dspy
import os
from dotenv import load_dotenv

# === Load secrets ===
load_dotenv(dotenv_path="/home/azureuser/microlearn/backend/prompt_opt_dspy/.env")

azure_api_key = os.getenv("AZURE_API_KEY")
azure_api_host = os.getenv("AZURE_API_HOST")
azure_api_version = os.getenv("AZURE_API_VERSION")

# === Configure Azure OpenAI LM ===
lm = dspy.LM(
    "azure/Csqr-gpt-4o-mini",
    api_key=azure_api_key,
    api_base=azure_api_host,
    api_version=azure_api_version,
    temperature=0.0,
)
dspy.configure(lm=lm)


# === DSPy Signature ===
class DurationEstimatorSignature(dspy.Signature):
    """
    You are a learning strategist designing microlearning journeys for non-technical professionals.

    I will provide you with:
    - The user’s job title
    - Their experience with the topic
    - Their current skills
    - A technical topic they need to learn
    - The desired proficiency level

    Instructions:
    1. Based on the user’s job title, current skills, and experience **with the topic**, estimate the gap between their current capabilities and the skills needed to reach the desired proficiency level for the given topic.
    2. Considering the user is non-technical and can dedicate a maximum of 360 minutes, suggest a suitable total time duration (in minutes, up to 360 minutes) that would be appropriate and realistic for a microlearning journey to bridge this gap.
    3. Only output the recommended total time duration in minutes — no explanation or other details.

    Follow this structured reasoning:
    1. **Topic Familiarity (Not Work Experience)**:
        Estimate the user's starting point based on their self-reported experience with the topic:
        - **Complete Beginner** → Starting from scratch
        - **Some Experience** → Aware of basic terms, limited application
        - **Intermediate** → Comfortable with fundamentals, can apply concepts
        - **Advanced** → Proficient, seeking mastery or advanced application

    2. **Job Context and Learning Bandwidth**:
        - **Executives / Senior Professionals**
        → Need only conceptual understanding + business value focus
        → Limited time, faster grasp of strategic value
        → Recommend **shorter durations**
        
        - **Mid-Level Professionals**
        → Require **understanding + application**
        → Moderate time, good motivation
        → Recommend **efficient durations**

        - **Junior Professionals**
        → May need more foundational context
        → More time available, learning is slower
        → Recommend **longer durations**

    3. **Proficiency Level Goals**:
        Match effort based on the target outcome:
        - **No-code** → Focus is on using tools or dashboards without scripting → typically fastest
        - **Low-code** → Involves configuration or automation with minimal scripting → moderate
        - **Hands-on Code** → Requires actual coding, debugging, and logic skills → longest duration
        - **Mixed Approach** → Combination of conceptual, tooling, and coding → variable but usually longer

    4. **Topic Complexity Factor**:
        Use judgment on how inherently complex the topic is. For example:
        - "APIs" is simpler than "Machine Learning" for most non-technical learners
        - "Data Visualization" (No-code) may be faster to teach than "Building ETL Pipelines" (Hands-on Code)

    5. **Make your estimate logical, realistic, and human-centered.**

    Output format:
    [Time Duration in minutes]

    User Profile:
    - Job Title: {Job_Title}
    - Experience with Topic: {Experience}
    - Current Skills: {Skills}
    - Target Topic: {topic}
    - Desired Proficiency Level: {Level}
    
    """

    Job_Title: str = dspy.InputField()
    Experience: str = dspy.InputField()
    Skills: str = dspy.InputField()
    topic: str = dspy.InputField()
    Level: str = dspy.InputField()
    duration_minutes: str = dspy.OutputField(
        desc="Recommended learning journey duration in minutes (only the number)"
    )


# === DSPy Module ===
class DurationEstimatorModule(dspy.Module):
    def __init__(self):
        super().__init__()
        predictor = dspy.Predict(DurationEstimatorSignature)

        # --- Few-shot examples ---
        trainset = [
            dspy.Example(
                Job_Title="Data Scientist",
                Experience="Advanced",
                Skills="Python, Statistics",
                topic="Machine Learning",
                Level="Hands-on coding",
                duration_minutes="240"
            ).with_inputs("Job_Title", "Experience", "Skills", "topic", "Level"),

            dspy.Example(
                Job_Title="Marketing Manager",
                Experience="Advanced",
                Skills="Campaigns, Content Strategy",
                topic="Data Analysis",
                Level="Low-code approach",
                duration_minutes="180"
            ).with_inputs("Job_Title", "Experience", "Skills", "topic", "Level"),

            dspy.Example(
                Job_Title="Product Manager",
                Experience="Some experience",
                Skills="Strategy, Cross-functional collaboration",
                topic="AI for Business",
                Level="Mixed approach",
                duration_minutes="180"
            ).with_inputs("Job_Title", "Experience", "Skills", "topic", "Level"),
        ]

        # --- Metric: must be a number within 0–360 ---
        def duration_metric(gold, pred, trace=None):
            try:
                val = int(pred.duration_minutes.strip())
                return int(0 <= val <= 360)
            except:
                return 0

        optimizer = dspy.BootstrapFewShot(metric=duration_metric)
        self.optimized_predictor = optimizer.compile(
            student=predictor,
            trainset=trainset
        )

    def forward(self, Job_Title, Experience, Skills, topic, Level):
        return self.optimized_predictor(
            Job_Title=Job_Title,
            Experience=Experience,
            Skills=Skills,
            topic=topic,
            Level=Level
        )


# === Tool Input Schema ===
class DurationEstimatorInput(BaseModel):
    """Input schema for DurationEstimatorTool."""
    Job_Title: str = Field(..., description="User's job title")
    Experience: str = Field(..., description="User's experience level with the topic")
    Skills: str = Field(..., description="User's current skills")
    topic: str = Field(..., description="The technical topic to learn")
    Level: str = Field(..., description="Desired proficiency level")


# === Custom DSPy Tool ===
class DurationEstimatorTool(BaseTool):
    name: str = "Duration Estimator Tool"
    description: str = (
        "Estimates a realistic total duration (in minutes, max 360) for a microlearning journey "
        "based on the user's job role, skills, topic familiarity, and target proficiency level."
    )
    args_schema: Type[BaseModel] = DurationEstimatorInput

    def _run(self, Job_Title: str, Experience: str, Skills: str, topic: str, Level: str) -> str:
        estimator = DurationEstimatorModule()
        result = estimator.forward(
            Job_Title=Job_Title,
            Experience=Experience,
            Skills=Skills,
            topic=topic,
            Level=Level
        )
        return result.duration_minutes