File size: 9,968 Bytes
c576592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import asyncio
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from typing import List
import asyncio


class TopicExtraction(BaseModel):
    """Extracts topics from news article."""

    topics: List[str] = Field(
        ..., description="A list of topics covered in the news article."
    )


class TopicOverlap(BaseModel):
    """Extracts topics from news article."""

    common_topics: List[str] = Field(
        ..., description="A list of topics covered in the both article."
    )
    unique_topics_1: List[str] = Field(
        ..., description="A list of topics unique to article 1."
    )
    unique_topics_2: List[str] = Field(
        ..., description="A list of topics unique to article 2."
    )


class ComparativeAnalyzer(BaseModel):
    """Compares given pair of articles and extracts comparison and impact."""

    comparison: str = Field(
        ..., description="A sentence of comparative insights between articles."
    )
    impact: str = Field(
        ..., description="A sentence of potential impacts from the compared articles."
    )


class FinalAnalysis(BaseModel):
    """Summarizes the Comparative analysis."""

    english: str = Field(..., description="Summarizes the analysis in english.")
    hindi: str = Field(..., description="Summarizes the analysis in hindi.")


class ChatBot:
    def __init__(

        self, api_key: str, model: str, articles_dict: list, company_name: str

    ):
        self.llm = ChatOpenAI(model=model, api_key=api_key, temperature=0.1)
        articles_list = []
        for i, article in enumerate(articles_dict):
            title = article["title"]
            summary = article["summary"]
            sentiment = article["sentiment"]
            articles_list.append(
                f"title {title} \n summary {summary} \n sentiment {sentiment}  \n\n"
            )

        self.articles = articles_list
        self.company_name = company_name

    async def topic_extraction(self, article: str):
        system_message = """You are an expert in text analysis and topic extraction. Your task is to identify the main topics from a short news articleS.



        ### Instructions:

        - Extract **2 to 3 key topics** that summarize the core ideas of the article.

        - Use **concise, generalizable topics** (e.g., "Electric Vehicles" instead of "Tesla Model X").

        - Avoid generic words like "news" or "report".

        - If relevant, include categories such as **Technology, Finance, Politics, Business, or Science**.

        - Return the topics in **JSON format** as a list of strings.

        - Seperate the topics for each articles by line break.

        - Do not include just he company name {company_name}





        ### Example:



        #### Input Article:

        "Tesla has launched a new AI-powered self-driving feature that improves vehicle autonomy and enhances road safety. The update is expected to impact the automotive industry's shift toward electric and smart vehicles."



        #### Output:

        ["Artificial Intelligence", "Self-Driving Cars", "Automotive Industry", "Electric Vehicles", "Road Safety"]

        :



                        """

        prompt = ChatPromptTemplate.from_messages(
            [("system", system_message), ("human", "Input Article: \n {articles}")]
        )
        structured_llm = self.llm.with_structured_output(TopicExtraction)
        chain = prompt | structured_llm
        response = await chain.ainvoke(
            ({"company_name": self.company_name, "articles": article})
        )
        return response.topics

    async def topic_overlap(self, id1: int, id2: int):
        article_1, article_2 = self.articles[id1], self.articles[id2]

        system_message = """You are an advanced AI specializing in text analysis and topic extraction. Your task is to compare two news articles and extract key topics.



        ### **Instructions:**

        - Identify **common topics** present in **both articles**.

        - Identify **topics unique to each article**.

        - Use **generalized topics** (e.g., "Electric Vehicles" instead of "Tesla Model X").

        - Ensure topics are **concise and meaningful**.

        ---

        ### **Example:**

        #### **Article 1:**

        "Tesla has launched a new AI-powered self-driving feature that enhances vehicle autonomy and road safety. The update is expected to impact the automotive industry."



        #### **Article 2:**

        "Regulators are reviewing Tesla’s self-driving technology due to safety concerns. Experts debate whether AI-based vehicle autonomy meets current legal standards."



        #### **Expected Output:**

        "common_topics": ["Self-Driving Cars", "Artificial Intelligence", "Safety"],

        "unique_topics_1": ["Automotive Industry", "Automotive Industry"],

        "unique_topics_2": ["Regulations", "Legal Standards"]

                        """

        user_message = """

        Here are the news articles on the company.

        Article 1: 

        {article_1} 

        Article 2: 

        {article_2}

        """

        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_message),
                ("human", user_message),
            ]
        )
        structured_llm = self.llm.with_structured_output(TopicOverlap)
        chain = prompt | structured_llm
        response = await chain.ainvoke({"article_1": article_1, "article_2": article_2})
        return {
            "Common Topics ": response.common_topics,
            f"Unique Topics in Article {id1}": response.unique_topics_1,
            f"Unique Topics in Article {id2}": response.unique_topics_2,
        }

    async def comparative_analysis(self, id1: int, id2: int):
        article_1, article_2 = self.articles[id1], self.articles[id2]

        system_message = """

        You are an AI assistant that performs Comparative Analysis on given articles.

        Analyze the following articles and provide a comparative analysis. Highlight their key themes, sentiment, and impact.

        Compare how each article portrays the companies and discuss potential implications for investors and the industry.

        Structure your response with 'Comparison' and 'Impact' sections. 

        The length of each comparison and impact should be less than 20 words

        Mention the articles ids.



        ### **Example:**

        #### **Article 1:**

        ""Tesla's New Model Breaks Sales Records.Tesla's latest EV sees record sales in Q3..."



        #### **Article 2:**

        "Regulatory Scrutiny on Tesla's Self-Driving Tech. Regulators have raised concerns over Tesla’s self-driving software..."



        #### **Expected Output:**

        "Comparison": "Article 1 highlights Tesla's strong sales, while Article 2 discusses regulatory issues.",

        "Impact": "The first article boosts confidence in Tesla's market growth, while the second raises concerns about future regulatory hurdles."

        """

        user_message = """

        Here are the news articles on the company.

        Article {id1}: 

        {article_1} 

        Article {id2}: 

        {article_2}

        """

        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_message),
                ("human", user_message),
            ]
        )
        structured_llm = self.llm.with_structured_output(ComparativeAnalyzer)
        chain = prompt | structured_llm
        response = await chain.ainvoke(
            {"article_1": article_1, "article_2": article_2, "id1": id1, "id2": id2}
        )
        return {
            f"comparison of {id1}, {id2}": response.comparison,
            "impact": response.impact,
        }

    async def main(self, similar_pairs: list):
        """Runs all OpenAI API calls in parallel."""

        topic_extraction_tasks = [
            self.topic_extraction(article) for article in self.articles
        ]

        topic_overlap_tasks = [
            self.topic_overlap(id1, id2) for id1, id2, _ in similar_pairs
        ]

        comparative_analysis_tasks = [
            self.comparative_analysis(id1, id2) for id1, id2, _ in similar_pairs
        ]

        (
            topic_extraction_results,
            topic_overlap_results,
            comparative_analysis_results,
        ) = await asyncio.gather(
            asyncio.gather(*topic_extraction_tasks),
            asyncio.gather(*topic_overlap_tasks),
            asyncio.gather(*comparative_analysis_tasks),
        )
        return {
            "topic_extraction_results": topic_extraction_results,
            "topic_overlap_results": topic_overlap_results,
            "comparative_analysis_results": comparative_analysis_results,
        }

    def final_analysis(self, comparative_analysis_articles):
        comparative_results = "Comparative Analysis: \n"
        for comparisons in comparative_analysis_articles:
            comparison, impact = comparisons.values()
            comparative_results += f"comparison: {comparison} \n impact: {impact} \n\n"

        template = """

        You are an AI assistant that reads a Comparative Analysis of Articles.

        And summarizes them to produce the final sentiment analysis.

        Make the final sentiment analysis less than 20 words

        Comprative Analysis:

        {comparative_results}

        """
        prompt = ChatPromptTemplate.from_template(template)
        structured_llm = self.llm.with_structured_output(FinalAnalysis)
        chain = prompt | structured_llm
        response = chain.invoke({"comparative_results": comparative_results})
        return response.english, response.hindi