Spaces:

Senzen
/

Back-End

Sleeping

File size: 9,968 Bytes

c576592

import asyncio
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from typing import List
import asyncio


class TopicExtraction(BaseModel):
    """Extracts topics from news article."""

    topics: List[str] = Field(
        ..., description="A list of topics covered in the news article."
    )


class TopicOverlap(BaseModel):
    """Extracts topics from news article."""

    common_topics: List[str] = Field(
        ..., description="A list of topics covered in the both article."
    )
    unique_topics_1: List[str] = Field(
        ..., description="A list of topics unique to article 1."
    )
    unique_topics_2: List[str] = Field(
        ..., description="A list of topics unique to article 2."
    )


class ComparativeAnalyzer(BaseModel):
    """Compares given pair of articles and extracts comparison and impact."""

    comparison: str = Field(
        ..., description="A sentence of comparative insights between articles."
    )
    impact: str = Field(
        ..., description="A sentence of potential impacts from the compared articles."
    )


class FinalAnalysis(BaseModel):
    """Summarizes the Comparative analysis."""

    english: str = Field(..., description="Summarizes the analysis in english.")
    hindi: str = Field(..., description="Summarizes the analysis in hindi.")


class ChatBot:
    def __init__(

        self, api_key: str, model: str, articles_dict: list, company_name: str

    ):
        self.llm = ChatOpenAI(model=model, api_key=api_key, temperature=0.1)
        articles_list = []
        for i, article in enumerate(articles_dict):
            title = article["title"]
            summary = article["summary"]
            sentiment = article["sentiment"]
            articles_list.append(
                f"title {title} \n summary {summary} \n sentiment {sentiment}  \n\n"
            )

        self.articles = articles_list
        self.company_name = company_name

    async def topic_extraction(self, article: str):
        system_message = """You are an expert in text analysis and topic extraction. Your task is to identify the main topics from a short news articleS.



        ### Instructions:

        - Extract **2 to 3 key topics** that summarize the core ideas of the article.

        - Use **concise, generalizable topics** (e.g., "Electric Vehicles" instead of "Tesla Model X").

        - Avoid generic words like "news" or "report".

        - If relevant, include categories such as **Technology, Finance, Politics, Business, or Science**.

        - Return the topics in **JSON format** as a list of strings.

        - Seperate the topics for each articles by line break.

        - Do not include just he company name {company_name}





        ### Example:



        #### Input Article:

        "Tesla has launched a new AI-powered self-driving feature that improves vehicle autonomy and enhances road safety. The update is expected to impact the automotive industry's shift toward electric and smart vehicles."



        #### Output:

        ["Artificial Intelligence", "Self-Driving Cars", "Automotive Industry", "Electric Vehicles", "Road Safety"]

        :



                        """

        prompt = ChatPromptTemplate.from_messages(
            [("system", system_message), ("human", "Input Article: \n {articles}")]
        )
        structured_llm = self.llm.with_structured_output(TopicExtraction)
        chain = prompt | structured_llm
        response = await chain.ainvoke(
            ({"company_name": self.company_name, "articles": article})
        )
        return response.topics

    async def topic_overlap(self, id1: int, id2: int):
        article_1, article_2 = self.articles[id1], self.articles[id2]

        system_message = """You are an advanced AI specializing in text analysis and topic extraction. Your task is to compare two news articles and extract key topics.



        ### **Instructions:**

        - Identify **common topics** present in **both articles**.

        - Identify **topics unique to each article**.

        - Use **generalized topics** (e.g., "Electric Vehicles" instead of "Tesla Model X").

        - Ensure topics are **concise and meaningful**.

        ---

        ### **Example:**

        #### **Article 1:**

        "Tesla has launched a new AI-powered self-driving feature that enhances vehicle autonomy and road safety. The update is expected to impact the automotive industry."



        #### **Article 2:**

        "Regulators are reviewing Tesla’s self-driving technology due to safety concerns. Experts debate whether AI-based vehicle autonomy meets current legal standards."



        #### **Expected Output:**

        "common_topics": ["Self-Driving Cars", "Artificial Intelligence", "Safety"],

        "unique_topics_1": ["Automotive Industry", "Automotive Industry"],

        "unique_topics_2": ["Regulations", "Legal Standards"]

                        """

        user_message = """

        Here are the news articles on the company.

        Article 1: 

        {article_1} 

        Article 2: 

        {article_2}

        """

        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_message),
                ("human", user_message),
            ]
        )
        structured_llm = self.llm.with_structured_output(TopicOverlap)
        chain = prompt | structured_llm
        response = await chain.ainvoke({"article_1": article_1, "article_2": article_2})
        return {
            "Common Topics ": response.common_topics,
            f"Unique Topics in Article {id1}": response.unique_topics_1,
            f"Unique Topics in Article {id2}": response.unique_topics_2,
        }

    async def comparative_analysis(self, id1: int, id2: int):
        article_1, article_2 = self.articles[id1], self.articles[id2]

        system_message = """

        You are an AI assistant that performs Comparative Analysis on given articles.

        Analyze the following articles and provide a comparative analysis. Highlight their key themes, sentiment, and impact.

        Compare how each article portrays the companies and discuss potential implications for investors and the industry.

        Structure your response with 'Comparison' and 'Impact' sections. 

        The length of each comparison and impact should be less than 20 words

        Mention the articles ids.



        ### **Example:**

        #### **Article 1:**

        ""Tesla's New Model Breaks Sales Records.Tesla's latest EV sees record sales in Q3..."



        #### **Article 2:**

        "Regulatory Scrutiny on Tesla's Self-Driving Tech. Regulators have raised concerns over Tesla’s self-driving software..."



        #### **Expected Output:**

        "Comparison": "Article 1 highlights Tesla's strong sales, while Article 2 discusses regulatory issues.",

        "Impact": "The first article boosts confidence in Tesla's market growth, while the second raises concerns about future regulatory hurdles."

        """

        user_message = """

        Here are the news articles on the company.

        Article {id1}: 

        {article_1} 

        Article {id2}: 

        {article_2}

        """

        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_message),
                ("human", user_message),
            ]
        )
        structured_llm = self.llm.with_structured_output(ComparativeAnalyzer)
        chain = prompt | structured_llm
        response = await chain.ainvoke(
            {"article_1": article_1, "article_2": article_2, "id1": id1, "id2": id2}
        )
        return {
            f"comparison of {id1}, {id2}": response.comparison,
            "impact": response.impact,
        }

    async def main(self, similar_pairs: list):
        """Runs all OpenAI API calls in parallel."""

        topic_extraction_tasks = [
            self.topic_extraction(article) for article in self.articles
        ]

        topic_overlap_tasks = [
            self.topic_overlap(id1, id2) for id1, id2, _ in similar_pairs
        ]

        comparative_analysis_tasks = [
            self.comparative_analysis(id1, id2) for id1, id2, _ in similar_pairs
        ]

        (
            topic_extraction_results,
            topic_overlap_results,
            comparative_analysis_results,
        ) = await asyncio.gather(
            asyncio.gather(*topic_extraction_tasks),
            asyncio.gather(*topic_overlap_tasks),
            asyncio.gather(*comparative_analysis_tasks),
        )
        return {
            "topic_extraction_results": topic_extraction_results,
            "topic_overlap_results": topic_overlap_results,
            "comparative_analysis_results": comparative_analysis_results,
        }

    def final_analysis(self, comparative_analysis_articles):
        comparative_results = "Comparative Analysis: \n"
        for comparisons in comparative_analysis_articles:
            comparison, impact = comparisons.values()
            comparative_results += f"comparison: {comparison} \n impact: {impact} \n\n"

        template = """

        You are an AI assistant that reads a Comparative Analysis of Articles.

        And summarizes them to produce the final sentiment analysis.

        Make the final sentiment analysis less than 20 words

        Comprative Analysis:

        {comparative_results}

        """
        prompt = ChatPromptTemplate.from_template(template)
        structured_llm = self.llm.with_structured_output(FinalAnalysis)
        chain = prompt | structured_llm
        response = chain.invoke({"comparative_results": comparative_results})
        return response.english, response.hindi