from typing import Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
import dspy
import os
from dotenv import load_dotenv

# === Load secrets ===
load_dotenv(dotenv_path="/home/azureuser/microlearn/backend/prompt_opt_dspy/.env")

azure_api_key = os.getenv("AZURE_API_KEY")
azure_api_host = os.getenv("AZURE_API_HOST")
azure_deployment_id = os.getenv("AZURE_DEPLOYMENT_ID")
azure_api_version = os.getenv("AZURE_API_VERSION")

# === Configure Azure OpenAI LM ===
lm = dspy.LM(
    "azure/Csqr-gpt-4o-mini",
    api_key=azure_api_key,
    api_base=azure_api_host,
    api_version=azure_api_version,
    temperature=0.0,
)
dspy.configure(lm=lm)

# === DSPy Signature ===
class QuestionGenerationSignature(dspy.Signature):
    """
    You are a Question Generator Specialist.
    Generate insightful questions from the given learning content.

    The user's company context is:
      Company Name : {Company_Name}
      Industry : {Industry}
      Company Size : {Company_Size}
      Business Model : {Business_Model}
      Top Use Cases : {Top_Use_Cases}
      Learning Focus : {Learning_Focus}
      Tech Stack : {Tech_stack}
      Compliance : {Compliance}

    From the content below, generate exactly three questions.
    - The user is allowed to pick either of the four approaches for the question.
      1. Low-code approach    2. Hands-on coding    3. Mixed approach   4. Any other
    - The users question preference is {Level}   

    Each question must:
        - Be answerable from the content.
        - Cover one easy, one moderate, and one difficult concept.
        - Be phrased clearly and concisely.
 
        ---CONTENT START---
        {content}
        ---CONTENT END---
    Expected Output :
        ### Q1:
        [Insert question]

        ### Q2:
        [Insert question]

        ### Q3:
        [Insert question]

    (Do NOT add headings, explanations, or anything else.)
    """

    company_context: str = dspy.InputField()
    Level: str = dspy.InputField()
    content: str = dspy.InputField()
    questions: str = dspy.OutputField(
        desc="Exactly three questions in the required format"
    )

# === DSPy Module ===
class QuestionGeneratorModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.predictor = dspy.Predict(QuestionGenerationSignature)
        # predictor = dspy.Predict(QuestionGenerationSignature)


        trainset = [
            
            dspy.Example(
                company_context="""
                    Company Name: FinSight
                    Industry: Financial Services
                    Company Size: 250 employees
                    Business Model: B2B 
                    Top Use Cases: Financial modeling
                    Learning Focus: Data Analysis
                    Tech Stack:  Excel, Tableau, SQL Server
                    Compliance: SOX, PCI DSS

                    """,
                    Level="Low-code approach",
                    content="""
        
                # Integrated User Journey: Mastering Data Analysis for Business Analysts
        
                ## Stage 1 - Foundations of Data Analysis
        
                ### Introduction
        
                Welcome to Stage 1 of our course, &amp;quot;Integrated User Journey: Mastering Data Analysis for Business Analysts.&amp;quot; In this foundational stage, we will explore the essential concepts of data analysis, focusing on a low-code approach. This method allows you to harness the power of data without needing extensive programming skills, making it accessible and practical for business analysts like you.
        
                As we embark on this journey, remember that the goal is to fill any skill gaps you may have, empowering you to make data-driven decisions that enhance user experiences and drive business success. Let’s dive into the world of data analysis, where we will uncover the tools and techniques that will become your allies in understanding and interpreting data effectively.
        
                ### Main Content
        
                Data analysis is the process of inspecting, cleaning, transforming, and modeling data to discover useful information, inform conclusions, and support decision-making. In today’s data-driven landscape, the ability to analyze data is not just a technical skill; it’s a vital competency for business analysts.
        
                #### Understanding Data Types
        
                To begin, let’s discuss the different types of data you’ll encounter. Data can be broadly categorized into two types: qualitative and quantitative.
        
                - **Qualitative data** refers to non-numerical information that describes qualities or characteristics. For example, customer feedback collected through surveys can provide insights into user satisfaction and preferences.
        
                - **Quantitative data**, on the other hand, is numerical and can be measured. This includes metrics like sales figures, website traffic, or conversion rates. Understanding these distinctions is crucial, as they dictate the methods you’ll use for analysis.
        
                #### The Data Analysis Process
        
                Now, let’s break down the data analysis process into manageable steps.
        
                1. **Define Your Objectives**: Start by clearly defining what you want to achieve with your analysis. Are you looking to improve customer retention, increase sales, or enhance user experience? Having a clear objective will guide your analysis.
        
                2. **Collect Data**: Gather data from various sources. This could include internal databases, customer surveys, or third-party analytics tools. In a low-code environment, tools like Google Data Studio or Microsoft Power BI can help you visualize and collect data without extensive coding.
        
                3. **Clean and Prepare Data**: Data cleaning is a critical step. It involves removing duplicates, correcting errors, and ensuring consistency. Low-code platforms often provide built-in functionalities to help automate this process, making it easier to prepare your data for analysis.
        
                4. **Analyze Data**: Once your data is clean, it’s time to analyze it. You can use low-code tools to create visualizations, such as charts and graphs, that help you identify trends and patterns. For instance, if you’re analyzing customer purchase behavior, a bar chart can quickly show you which products are most popular.
        
                5. **Interpret Results**: After analyzing the data, interpret the results in the context of your objectives. What story does the data tell? Are there actionable insights that can inform your business strategy?
        
                6. **Communicate Findings**: Finally, it’s essential to communicate your findings effectively. Use visual aids and clear language to present your insights to stakeholders. Remember, the goal is to make your analysis understandable and actionable.
        
                #### Tools for Low-Code Data Analysis
        
                In this stage, we’ll also introduce you to some popular low-code tools that can enhance your data analysis capabilities:
        
                - **Tableau**: A powerful data visualization tool that allows you to create interactive dashboards without needing to write code. It’s user-friendly and ideal for presenting data insights.
        
                - **Microsoft Power BI**: This tool integrates seamlessly with other Microsoft products and offers robust data modeling and visualization capabilities. It’s perfect for business analysts looking to create reports and dashboards quickly.
        
                - **Google Data Studio**: A free tool that enables you to create customizable reports and dashboards. It’s particularly useful for visualizing data from Google Analytics and other Google services.
        
                By leveraging these tools, you can focus on the analysis itself rather than getting bogged down in coding complexities.
        
                ### Conclusion
        
                As we conclude this foundational stage, remember that mastering data analysis is a journey. By understanding data types, following a structured analysis process, and utilizing low-code tools, you are well on your way to becoming a proficient data analyst. This knowledge will not only fill your skill gaps but also empower you to make informed decisions that enhance user experiences and drive business success.
        
                ### Reflection Questions
        
                1. What specific objectives do you have for your data analysis efforts, and how can you ensure they align with your organization’s goals?
                2. Which low-code tools do you find most appealing for your data analysis needs, and how do you plan to incorporate them into your workflow?
                3. Reflect on a recent project where data analysis could have provided valuable insights. What steps would you take to implement the analysis process we discussed?
        
                Thank you for joining me in this foundational stage. I look forward to seeing you in the next stage, where we will build upon


                """,
                    questions="""
                ### Q1:
                What are the two broad categories of data that business analysts need to understand in data analysis?

                ### Q2:
                What steps should you take to ensure your data analysis objectives align with your organization’s goals?

                ### Q3:
                How can the use of low-code tools like Tableau, Microsoft Power BI, and Google Data Studio streamline the data cleaning and analysis process for business analysts?
                """
                ).with_inputs("company_context", "Level", "content"),
            
            dspy.Example(
                    company_context="""
                        Company Name: FinSight
                        Industry: Financial Services
                        Company Size: 250 employees
                        Business Model: B2B 
                        Top Use Cases: Financial modeling
                        Learning Focus: Data Analysis
                        Tech Stack:  Excel, Tableau, SQL Server
                        Compliance: SOX, PCI DSS

                    """,
                    Level="Low-code approach",
                    content="""
                ## Stage 3 - Data Analysis Techniques

                ### Introduction

                Welcome to Stage 3 of our course, &amp;amp;quot;Integrated User Journey: Mastering Data Analysis for Business Analysts.&amp;amp;quot; In this stage, we will delve into the essential data analysis techniques that empower you to transform raw data into actionable insights. Whether you choose a low-code approach, hands-on coding, or a mixed method, our goal is to equip you with the skills to analyze data effectively and communicate your findings persuasively.

                As business analysts, your ability to interpret data is crucial. It not only informs decision-making but also enhances the user experience by aligning business strategies with user needs. Today, we will explore SQL for data extraction, statistics for understanding data trends, and the art of communication and storytelling to present your findings compellingly.

                ### Main Content

                Let’s begin with SQL, the backbone of data retrieval. SQL, or Structured Query Language, allows you to interact with databases to extract the information you need. Imagine you’re working for an e-commerce company, and you want to analyze customer purchase behavior. You might write a SQL query to pull data on customer transactions over the last quarter. 

                For instance, a simple query like:

                ```sql
                SELECT customer_id, purchase_date, total_amount
                FROM transactions
                WHERE purchase_date BETWEEN &amp;amp;#39;2023-01-01&amp;amp;#39; AND &amp;amp;#39;2023-03-31&amp;amp;#39;;
                ```

                This query retrieves all transactions made by customers in the first quarter of 2023. By analyzing this data, you can identify trends, such as peak purchasing times or popular products. 

                Now, let’s transition to statistics. Statistics is the science of collecting, analyzing, interpreting, presenting, and organizing data. It helps you make sense of the numbers you extract. For example, you might calculate the average purchase amount to understand customer spending behavior. 

                If you find that the average purchase amount is significantly lower than expected, it could prompt further investigation. Perhaps there are barriers in the user journey that discourage larger purchases. By applying statistical techniques, such as regression analysis, you can identify factors that influence purchasing decisions, allowing you to make data-driven recommendations.

                Next, we’ll discuss the importance of communication. As a business analyst, your role is not just to analyze data but to convey your findings effectively. This is where the art of storytelling comes into play. 

                Imagine presenting your analysis to stakeholders. Instead of bombarding them with numbers, you could craft a narrative around your findings. For instance, you might say, “In the last quarter, we observed a 20% increase in purchases during weekends, particularly for our new product line. This suggests that our marketing efforts are resonating with customers during their leisure time.” 

                By framing your data within a story, you make it relatable and easier for your audience to grasp the implications of your analysis. 

                Finally, let’s touch on visualization. While we won’t focus heavily on this in this stage, it’s worth noting that visualizing your data can enhance understanding. Tools like Tableau or Power BI allow you to create compelling visuals that highlight key trends and insights. A well-designed chart can often communicate complex data more effectively than words alone.

                ### Conclusion

                In this stage, we’ve explored the fundamental data analysis techniques that every business analyst should master. From SQL for data extraction to statistics for analysis, and the importance of communication and storytelling, these skills are essential for transforming data into insights that drive business decisions. 

                As you continue your journey, remember that the ability to analyze and communicate data effectively is a powerful tool in your arsenal. Embrace these techniques, and you’ll be well on your way to becoming a master data analyst.

                ### Reflection Questions

                1. How can you apply SQL queries to extract meaningful insights from your current data sets?
                2. What statistical methods do you find most useful in your analysis, and how can you incorporate them into your workflow?
                3. Reflect on a recent analysis you conducted. How could you enhance your storytelling approach to better communicate your findings to stakeholders? 

                Thank you for joining me in this stage. I look forward to seeing how you apply these techniques in your work!

                """,
                    questions="""
                
                ### Q1:
                What is the purpose of SQL in the context of data analysis for business analysts?

                ### Q2:
                How can regression analysis be utilized to improve understanding of customer purchasing decisions?

                ### Q3:
                In what ways can the storytelling approach be tailored to effectively communicate complex data findings to stakeholders?
                
                """
                ).with_inputs("company_context", "Level", "content")
        
        
        ]


        def question_metric(gold, pred, trace = None):
            """
            Custom metric to check if the generated output includes exactly 3 questions:
            Q1, Q2, and Q3.
            """
            output = getattr(pred, "questions", "") or ""

            required_questions = ["### Q1:", "### Q2:", "### Q3:"]
            has_all = all(q in output for q in required_questions)

            # Ensure no extra beyond 3 (very strict check)
            count_qs = output.count("### Q")
            return int(has_all and count_qs == 3)


        # --- Compile with Bootstrap Few-Shot Optimizer ---
        optimizer = dspy.BootstrapFewShot(metric=question_metric)
        self.optimized_predictor = optimizer.compile(
                    student=self.predictor,
                    trainset=trainset,
                    # num_trials=3
                )


    def forward(self, company_context, Level, content):
        return self.predictor(
            company_context=company_context,
            Level=Level,
            content=content
        )

# === Tool Input Schema ===
class QuestionGeneratorInput(BaseModel):
    """Input schema for QuestionGenerationTool."""
    Company_Name: str = Field(..., description="Name of the company")
    Industry: str = Field(..., description="Industry of the company")
    Company_Size: str = Field(..., description="Size of the company")
    Business_Model: str = Field(..., description="Company's business model")
    Top_Use_Cases: str = Field(..., description="Top use cases the company focuses on")
    Learning_Focus: str = Field(..., description="Learning focus for the user")
    Tech_stack: str = Field(..., description="Company's tech stack")
    Compliance: str = Field(..., description="Relevant compliance requirements")
    Level: str = Field(..., description="User's question preference (Low-code, Hands-on coding, Mixed, or Other)")
    content: str = Field(..., description="Learning content from which to generate questions")

# === Custom DSPy Tool ===
class QuestionGenerationTool(BaseTool):
    name: str = "Question Generation Tool"
    description: str = (
        "Generates exactly three insightful questions from course content, "
        "customized by company context and user preferences."
    )
    args_schema: Type[BaseModel] = QuestionGeneratorInput

    def _run(
        self,
        Company_Name: str,
        Industry: str,
        Company_Size: str,
        Business_Model: str,
        Top_Use_Cases: str,
        Learning_Focus: str,
        Tech_stack: str,
        Compliance: str,
        Level: str,
        content: str
    ) -> str:

        # Build company context string
        company_context = f"""
        Company Name: {Company_Name}
        Industry: {Industry}
        Company Size: {Company_Size}
        Business Model: {Business_Model}
        Top Use Cases: {Top_Use_Cases}
        Learning Focus: {Learning_Focus}
        Tech Stack: {Tech_stack}
        Compliance: {Compliance}
        """

        module = QuestionGeneratorModule()
        result = module.forward(
            company_context=company_context,
            Level=Level,
            content=content
        )

        return result.questions


# # === CLI Test Usage ===
# if __name__ == "__main__":
#     tool = QuestionGenerationTool()
#     output = tool._run(
#         Company_Name="DeepCore AI",
#         Industry="Technology",
#         Company_Size="500 employees",
#         Business_Model="SaaS",
#         Top_Use_Cases="Predictive analytics",
#         Learning_Focus="Machine Learning",
#         Tech_stack="Python, AWS, PostgreSQL",
#         Compliance="SOC 2",
#         Level="Low-code approach",
#         content="SQL helps business analysts extract structured data for insights, while regression analysis reveals customer behavior patterns, and storytelling enhances communication with stakeholders."
#     )
#     print("=== Generated Questions ===")
#     print(output)


###  Version1 Without Fewshot

# from typing import Type
# from crewai.tools import BaseTool
# from pydantic import BaseModel, Field
# import dspy
# import os
# from dotenv import load_dotenv

# # === Load secrets ===
# load_dotenv(dotenv_path="/home/azureuser/microlearn/backend/prompt_opt_dspy/.env")

# azure_api_key = os.getenv("AZURE_API_KEY")
# azure_api_host = os.getenv("AZURE_API_HOST")
# azure_deployment_id = os.getenv("AZURE_DEPLOYMENT_ID")
# azure_api_version = os.getenv("AZURE_API_VERSION")

# # === Configure Azure OpenAI LM ===
# lm = dspy.LM(
#     "azure/Csqr-gpt-4o-mini",
#     api_key=azure_api_key,
#     api_base=azure_api_host,
#     api_version=azure_api_version,
#     temperature=0.0,
# )
# dspy.configure(lm=lm)

# # === DSPy Signature ===
# class QuestionGenerationSignature(dspy.Signature):
#     """
#     You are a Question Generator Specialist.
#     Generate insightful questions from the given learning content.

#     The user's company context is:
#       Company Name : {Company_Name}
#       Industry : {Industry}
#       Company Size : {Company_Size}
#       Business Model : {Business_Model}
#       Top Use Cases : {Top_Use_Cases}
#       Learning Focus : {Learning_Focus}
#       Tech Stack : {Tech_stack}
#       Compliance : {Compliance}

#     Task:
#     - From the content, generate exactly three questions.
#     - User's question preference is {Level} (Low-code, Hands-on coding, Mixed, or Other).
#     - Each question must:
#         * Be answerable from the content.
#         * Cover one easy, one moderate, and one difficult concept.
#         * Be phrased clearly and concisely.

#     Expected Output:
#       ### Q1:
#       [Insert question]

#       ### Q2:
#       [Insert question]

#       ### Q3:
#       [Insert question]
#     """

#     company_context: str = dspy.InputField()
#     Level: str = dspy.InputField()
#     content: str = dspy.InputField()
#     questions: str = dspy.OutputField(
#         desc="Exactly three questions in the required format"
#     )

# # === DSPy Module ===
# class QuestionGeneratorModule(dspy.Module):
#     def __init__(self):
#         super().__init__()
#         self.predictor = dspy.Predict(QuestionGenerationSignature)

#     def forward(self, company_context, Level, content):
#         return self.predictor(
#             company_context=company_context,
#             Level=Level,
#             content=content
#         )

# # === Tool Input Schema ===
# class QuestionGeneratorInput(BaseModel):
#     """Input schema for QuestionGenerationTool."""
#     Company_Name: str = Field(..., description="Name of the company")
#     Industry: str = Field(..., description="Industry of the company")
#     Company_Size: str = Field(..., description="Size of the company")
#     Business_Model: str = Field(..., description="Company's business model")
#     Top_Use_Cases: str = Field(..., description="Top use cases the company focuses on")
#     Learning_Focus: str = Field(..., description="Learning focus for the user")
#     Tech_stack: str = Field(..., description="Company's tech stack")
#     Compliance: str = Field(..., description="Relevant compliance requirements")
#     Level: str = Field(..., description="User's question preference (Low-code, Hands-on coding, Mixed, or Other)")
#     content: str = Field(..., description="Learning content from which to generate questions")

# # === Custom DSPy Tool ===
# class QuestionGenerationTool(BaseTool):
#     name: str = "Question Generation Tool"
#     description: str = (
#         "Generates exactly three insightful questions from course content, "
#         "customized by company context and user preferences."
#     )
#     args_schema: Type[BaseModel] = QuestionGeneratorInput

#     def _run(
#         self,
#         Company_Name: str,
#         Industry: str,
#         Company_Size: str,
#         Business_Model: str,
#         Top_Use_Cases: str,
#         Learning_Focus: str,
#         Tech_stack: str,
#         Compliance: str,
#         Level: str,
#         content: str
#     ) -> str:

#         # Build company context string
#         company_context = f"""
#         Company Name: {Company_Name}
#         Industry: {Industry}
#         Company Size: {Company_Size}
#         Business Model: {Business_Model}
#         Top Use Cases: {Top_Use_Cases}
#         Learning Focus: {Learning_Focus}
#         Tech Stack: {Tech_stack}
#         Compliance: {Compliance}
#         """

#         module = QuestionGeneratorModule()
#         result = module.forward(
#             company_context=company_context,
#             Level=Level,
#             content=content
#         )

#         return result.questions


# # # === CLI Test Usage ===
# # if __name__ == "__main__":
# #     tool = QuestionGenerationTool()
# #     output = tool._run(
# #         Company_Name="DeepCore AI",
# #         Industry="Technology",
# #         Company_Size="500 employees",
# #         Business_Model="SaaS",
# #         Top_Use_Cases="Predictive analytics",
# #         Learning_Focus="Machine Learning",
# #         Tech_stack="Python, AWS, PostgreSQL",
# #         Compliance="SOC 2",
# #         Level="Low-code approach",
# #         content="SQL helps business analysts extract structured data for insights, while regression analysis reveals customer behavior patterns, and storytelling enhances communication with stakeholders."
# #     )
# #     print("=== Generated Questions ===")
# #     print(output)