File size: 9,968 Bytes
c576592 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
import asyncio
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from typing import List
import asyncio
class TopicExtraction(BaseModel):
"""Extracts topics from news article."""
topics: List[str] = Field(
..., description="A list of topics covered in the news article."
)
class TopicOverlap(BaseModel):
"""Extracts topics from news article."""
common_topics: List[str] = Field(
..., description="A list of topics covered in the both article."
)
unique_topics_1: List[str] = Field(
..., description="A list of topics unique to article 1."
)
unique_topics_2: List[str] = Field(
..., description="A list of topics unique to article 2."
)
class ComparativeAnalyzer(BaseModel):
"""Compares given pair of articles and extracts comparison and impact."""
comparison: str = Field(
..., description="A sentence of comparative insights between articles."
)
impact: str = Field(
..., description="A sentence of potential impacts from the compared articles."
)
class FinalAnalysis(BaseModel):
"""Summarizes the Comparative analysis."""
english: str = Field(..., description="Summarizes the analysis in english.")
hindi: str = Field(..., description="Summarizes the analysis in hindi.")
class ChatBot:
def __init__(
self, api_key: str, model: str, articles_dict: list, company_name: str
):
self.llm = ChatOpenAI(model=model, api_key=api_key, temperature=0.1)
articles_list = []
for i, article in enumerate(articles_dict):
title = article["title"]
summary = article["summary"]
sentiment = article["sentiment"]
articles_list.append(
f"title {title} \n summary {summary} \n sentiment {sentiment} \n\n"
)
self.articles = articles_list
self.company_name = company_name
async def topic_extraction(self, article: str):
system_message = """You are an expert in text analysis and topic extraction. Your task is to identify the main topics from a short news articleS.
### Instructions:
- Extract **2 to 3 key topics** that summarize the core ideas of the article.
- Use **concise, generalizable topics** (e.g., "Electric Vehicles" instead of "Tesla Model X").
- Avoid generic words like "news" or "report".
- If relevant, include categories such as **Technology, Finance, Politics, Business, or Science**.
- Return the topics in **JSON format** as a list of strings.
- Seperate the topics for each articles by line break.
- Do not include just he company name {company_name}
### Example:
#### Input Article:
"Tesla has launched a new AI-powered self-driving feature that improves vehicle autonomy and enhances road safety. The update is expected to impact the automotive industry's shift toward electric and smart vehicles."
#### Output:
["Artificial Intelligence", "Self-Driving Cars", "Automotive Industry", "Electric Vehicles", "Road Safety"]
:
"""
prompt = ChatPromptTemplate.from_messages(
[("system", system_message), ("human", "Input Article: \n {articles}")]
)
structured_llm = self.llm.with_structured_output(TopicExtraction)
chain = prompt | structured_llm
response = await chain.ainvoke(
({"company_name": self.company_name, "articles": article})
)
return response.topics
async def topic_overlap(self, id1: int, id2: int):
article_1, article_2 = self.articles[id1], self.articles[id2]
system_message = """You are an advanced AI specializing in text analysis and topic extraction. Your task is to compare two news articles and extract key topics.
### **Instructions:**
- Identify **common topics** present in **both articles**.
- Identify **topics unique to each article**.
- Use **generalized topics** (e.g., "Electric Vehicles" instead of "Tesla Model X").
- Ensure topics are **concise and meaningful**.
---
### **Example:**
#### **Article 1:**
"Tesla has launched a new AI-powered self-driving feature that enhances vehicle autonomy and road safety. The update is expected to impact the automotive industry."
#### **Article 2:**
"Regulators are reviewing Tesla’s self-driving technology due to safety concerns. Experts debate whether AI-based vehicle autonomy meets current legal standards."
#### **Expected Output:**
"common_topics": ["Self-Driving Cars", "Artificial Intelligence", "Safety"],
"unique_topics_1": ["Automotive Industry", "Automotive Industry"],
"unique_topics_2": ["Regulations", "Legal Standards"]
"""
user_message = """
Here are the news articles on the company.
Article 1:
{article_1}
Article 2:
{article_2}
"""
prompt = ChatPromptTemplate.from_messages(
[
("system", system_message),
("human", user_message),
]
)
structured_llm = self.llm.with_structured_output(TopicOverlap)
chain = prompt | structured_llm
response = await chain.ainvoke({"article_1": article_1, "article_2": article_2})
return {
"Common Topics ": response.common_topics,
f"Unique Topics in Article {id1}": response.unique_topics_1,
f"Unique Topics in Article {id2}": response.unique_topics_2,
}
async def comparative_analysis(self, id1: int, id2: int):
article_1, article_2 = self.articles[id1], self.articles[id2]
system_message = """
You are an AI assistant that performs Comparative Analysis on given articles.
Analyze the following articles and provide a comparative analysis. Highlight their key themes, sentiment, and impact.
Compare how each article portrays the companies and discuss potential implications for investors and the industry.
Structure your response with 'Comparison' and 'Impact' sections.
The length of each comparison and impact should be less than 20 words
Mention the articles ids.
### **Example:**
#### **Article 1:**
""Tesla's New Model Breaks Sales Records.Tesla's latest EV sees record sales in Q3..."
#### **Article 2:**
"Regulatory Scrutiny on Tesla's Self-Driving Tech. Regulators have raised concerns over Tesla’s self-driving software..."
#### **Expected Output:**
"Comparison": "Article 1 highlights Tesla's strong sales, while Article 2 discusses regulatory issues.",
"Impact": "The first article boosts confidence in Tesla's market growth, while the second raises concerns about future regulatory hurdles."
"""
user_message = """
Here are the news articles on the company.
Article {id1}:
{article_1}
Article {id2}:
{article_2}
"""
prompt = ChatPromptTemplate.from_messages(
[
("system", system_message),
("human", user_message),
]
)
structured_llm = self.llm.with_structured_output(ComparativeAnalyzer)
chain = prompt | structured_llm
response = await chain.ainvoke(
{"article_1": article_1, "article_2": article_2, "id1": id1, "id2": id2}
)
return {
f"comparison of {id1}, {id2}": response.comparison,
"impact": response.impact,
}
async def main(self, similar_pairs: list):
"""Runs all OpenAI API calls in parallel."""
topic_extraction_tasks = [
self.topic_extraction(article) for article in self.articles
]
topic_overlap_tasks = [
self.topic_overlap(id1, id2) for id1, id2, _ in similar_pairs
]
comparative_analysis_tasks = [
self.comparative_analysis(id1, id2) for id1, id2, _ in similar_pairs
]
(
topic_extraction_results,
topic_overlap_results,
comparative_analysis_results,
) = await asyncio.gather(
asyncio.gather(*topic_extraction_tasks),
asyncio.gather(*topic_overlap_tasks),
asyncio.gather(*comparative_analysis_tasks),
)
return {
"topic_extraction_results": topic_extraction_results,
"topic_overlap_results": topic_overlap_results,
"comparative_analysis_results": comparative_analysis_results,
}
def final_analysis(self, comparative_analysis_articles):
comparative_results = "Comparative Analysis: \n"
for comparisons in comparative_analysis_articles:
comparison, impact = comparisons.values()
comparative_results += f"comparison: {comparison} \n impact: {impact} \n\n"
template = """
You are an AI assistant that reads a Comparative Analysis of Articles.
And summarizes them to produce the final sentiment analysis.
Make the final sentiment analysis less than 20 words
Comprative Analysis:
{comparative_results}
"""
prompt = ChatPromptTemplate.from_template(template)
structured_llm = self.llm.with_structured_output(FinalAnalysis)
chain = prompt | structured_llm
response = chain.invoke({"comparative_results": comparative_results})
return response.english, response.hindi
|