BSJ2004 commited on
Commit
e4da793
·
verified ·
1 Parent(s): ad587fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +388 -496
app.py CHANGED
@@ -1,496 +1,388 @@
1
- import streamlit as st
2
- import requests
3
- import pandas as pd
4
- import json
5
- import os
6
- import matplotlib.pyplot as plt
7
- import seaborn as sns
8
- import base64
9
- from io import BytesIO
10
- from PIL import Image, ImageEnhance
11
- import time
12
- from typing import Dict, Any, List
13
-
14
- # API Base URL - Change this to match your deployment
15
- API_BASE_URL = "http://localhost:8000"
16
-
17
- # New function to generate the example output format
18
- def generate_example_output(company_name: str) -> str:
19
- """
20
- Generate output in the example format for the given company.
21
- Returns the formatted JSON as a string.
22
- """
23
- try:
24
- # Make API request to get the analysis data
25
- url = f"{API_BASE_URL}/api/complete_analysis"
26
- response = requests.post(url, json={"company_name": company_name})
27
- response.raise_for_status()
28
- data = response.json()
29
-
30
- # Format the data to match the example output format exactly
31
- formatted_output = {
32
- "Company": data["Company"],
33
- "Articles": data["Articles"],
34
- "Comparative Sentiment Score": {
35
- "Sentiment Distribution": data["Comparative Sentiment Score"]["Sentiment Distribution"],
36
- "Coverage Differences": data["Comparative Sentiment Score"]["Coverage Differences"],
37
- "Topic Overlap": data["Comparative Sentiment Score"]["Topic Overlap"]
38
- },
39
- "Final Sentiment Analysis": data["Final Sentiment Analysis"],
40
- "Audio": "[Play Hindi Speech]" if data.get("Audio") else "No audio available"
41
- }
42
-
43
- # Convert to JSON string with proper formatting
44
- return json.dumps(formatted_output, indent=2)
45
-
46
- except Exception as e:
47
- return json.dumps({
48
- "error": str(e),
49
- "message": "Failed to generate example output"
50
- }, indent=2)
51
-
52
- # Function to run in terminal mode
53
- def run_terminal_mode():
54
- """Run the app in terminal mode to output JSON"""
55
- print("News Analysis Terminal Mode")
56
- company_name = input("Enter company name: ")
57
- print(f"Analyzing {company_name}...")
58
- output = generate_example_output(company_name)
59
- print(output)
60
-
61
- # Check if run directly or imported
62
- if __name__ == "__main__":
63
- # Check if terminal mode is requested via command line args
64
- import sys
65
- if len(sys.argv) > 1 and sys.argv[1] == "--terminal":
66
- run_terminal_mode()
67
- else:
68
- # Continue with the Streamlit app
69
-
70
- # App title and description
71
- st.set_page_config(
72
- page_title="News Summarization & TTS",
73
- page_icon="📰",
74
- layout="wide",
75
- initial_sidebar_state="expanded"
76
- )
77
-
78
- # Custom CSS for better UI
79
- st.markdown("""
80
- <style>
81
- .main-header {
82
- font-size: 2.5rem;
83
- font-weight: 700;
84
- color: #1E3A8A;
85
- margin-bottom: 1rem;
86
- }
87
- .sub-header {
88
- font-size: 1.5rem;
89
- font-weight: 600;
90
- color: #2563EB;
91
- margin-top: 1rem;
92
- margin-bottom: 0.5rem;
93
- }
94
- .card {
95
- padding: 1.5rem;
96
- border-radius: 0.5rem;
97
- background-color: #F8FAFC;
98
- border: 1px solid #E2E8F0;
99
- margin-bottom: 1rem;
100
- }
101
- .positive {
102
- color: #059669;
103
- font-weight: 600;
104
- }
105
- .negative {
106
- color: #DC2626;
107
- font-weight: 600;
108
- }
109
- .neutral {
110
- color: #6B7280;
111
- font-weight: 600;
112
- }
113
- .topic-tag {
114
- display: inline-block;
115
- padding: 0.25rem 0.5rem;
116
- border-radius: 2rem;
117
- background-color: #E5E7EB;
118
- color: #1F2937;
119
- font-size: 0.75rem;
120
- margin-right: 0.5rem;
121
- margin-bottom: 0.5rem;
122
- }
123
- .audio-container {
124
- width: 100%;
125
- padding: 1rem;
126
- background-color: #F3F4F6;
127
- border-radius: 0.5rem;
128
- margin-top: 1rem;
129
- }
130
- .info-text {
131
- font-size: 0.9rem;
132
- color: #4B5563;
133
- }
134
- .article-title {
135
- font-size: 1.2rem;
136
- font-weight: 600;
137
- color: #111827;
138
- margin-bottom: 0.5rem;
139
- margin-top: 0.5rem;
140
- }
141
- .article-summary {
142
- font-size: 0.9rem;
143
- color: #374151;
144
- margin-bottom: 0.5rem;
145
- }
146
- .article-meta {
147
- font-size: 0.8rem;
148
- color: #6B7280;
149
- margin-bottom: 0.5rem;
150
- }
151
- .section-divider {
152
- height: 1px;
153
- background-color: #E5E7EB;
154
- margin: 1.5rem 0;
155
- }
156
- .chart-container {
157
- background-color: white;
158
- padding: 1rem;
159
- border-radius: 0.5rem;
160
- border: 1px solid #E2E8F0;
161
- }
162
- </style>
163
- """, unsafe_allow_html=True)
164
-
165
- # Function to make API requests
166
- def make_api_request(endpoint: str, data: Dict[str, Any] = None, method: str = "POST") -> Dict[str, Any]:
167
- """Make API request to the backend."""
168
- url = f"{API_BASE_URL}{endpoint}"
169
-
170
- try:
171
- if method == "GET":
172
- response = requests.get(url)
173
- else:
174
- response = requests.post(url, json=data)
175
-
176
- response.raise_for_status()
177
- return response.json()
178
- except requests.exceptions.ConnectionError:
179
- st.error("⚠️ Connection Error: Cannot connect to the API server. Please ensure the API server is running at " + API_BASE_URL)
180
- return {}
181
- except requests.exceptions.Timeout:
182
- st.error("⚠️ Timeout Error: The request took too long to complete. Please try again with a different company name.")
183
- return {}
184
- except requests.exceptions.HTTPError as e:
185
- if e.response.status_code == 404:
186
- st.error("⚠️ No articles found for this company. Please try another company name.")
187
- elif e.response.status_code == 500:
188
- # Try to get detailed error message
189
- try:
190
- error_detail = e.response.json().get("detail", "Unknown server error")
191
- st.error(f"⚠️ Server Error: {error_detail}")
192
- except:
193
- st.error("⚠️ Internal Server Error: Something went wrong on the server. Please try again later.")
194
- else:
195
- st.error(f"⚠️ HTTP Error: {str(e)}")
196
- return {}
197
- except Exception as e:
198
- st.error(f"⚠️ Error: {str(e)}")
199
- return {}
200
-
201
- # Function to create sentiment color
202
- def get_sentiment_color(sentiment: str) -> str:
203
- """Return CSS class for sentiment."""
204
- if sentiment == "Positive":
205
- return "positive"
206
- elif sentiment == "Negative":
207
- return "negative"
208
- else:
209
- return "neutral"
210
-
211
- # Function to create visualization for sentiment distribution
212
- def plot_sentiment_distribution(sentiment_data: Dict[str, int]):
213
- """Create and display a bar chart for sentiment distribution."""
214
- labels = ["Positive", "Neutral", "Negative"]
215
- values = [sentiment_data[label] for label in labels]
216
- colors = ["#059669", "#6B7280", "#DC2626"]
217
-
218
- fig, ax = plt.subplots(figsize=(10, 6))
219
- ax.bar(labels, values, color=colors)
220
- ax.set_title("Sentiment Distribution", fontsize=16, fontweight='bold')
221
- ax.set_ylabel("Number of Articles", fontsize=12)
222
- ax.grid(axis='y', linestyle='--', alpha=0.7)
223
-
224
- # Add value labels on top of bars
225
- for i, v in enumerate(values):
226
- ax.text(i, v + 0.1, str(v), ha='center', fontweight='bold')
227
-
228
- return fig
229
-
230
- # Function to display article information
231
- def display_article(article: Dict[str, Any], index: int):
232
- """Display article information in a card layout."""
233
- st.markdown(f"<div class='card'>", unsafe_allow_html=True)
234
-
235
- # Article title and sentiment
236
- sentiment = article.get("Sentiment", "Neutral")
237
- sentiment_class = get_sentiment_color(sentiment)
238
-
239
- st.markdown(f"<h3 class='article-title'>{index+1}. {article['Title']}</h3>", unsafe_allow_html=True)
240
- st.markdown(f"<span class='{sentiment_class}'>{sentiment}</span>", unsafe_allow_html=True)
241
-
242
- # Article summary
243
- st.markdown("<div class='article-summary'>", unsafe_allow_html=True)
244
- st.markdown(f"{article.get('Summary', 'No summary available.')}", unsafe_allow_html=True)
245
- st.markdown("</div>", unsafe_allow_html=True)
246
-
247
- # Topics
248
- if "Topics" in article and article["Topics"]:
249
- st.markdown("<div>", unsafe_allow_html=True)
250
- for topic in article["Topics"]:
251
- st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
252
- st.markdown("</div>", unsafe_allow_html=True)
253
-
254
- st.markdown("</div>", unsafe_allow_html=True)
255
-
256
- # App layout
257
- st.markdown("<h1 class='main-header'>📰 News Summarization & Text-to-Speech</h1>", unsafe_allow_html=True)
258
- st.markdown("""
259
- <p class='info-text'>
260
- This application extracts news articles about a company, performs sentiment analysis, conducts comparative analysis,
261
- and generates a text-to-speech output in Hindi. Enter a company name to get started.
262
- </p>
263
- """, unsafe_allow_html=True)
264
-
265
- # Sidebar
266
- st.sidebar.image("https://cdn-icons-png.flaticon.com/512/2593/2593073.png", width=100)
267
- st.sidebar.title("News Analysis Settings")
268
-
269
- # Company selection
270
- company_input_method = st.sidebar.radio(
271
- "Select company input method:",
272
- options=["Text Input", "Choose from List"]
273
- )
274
-
275
- if company_input_method == "Text Input":
276
- company_name = st.sidebar.text_input("Enter Company Name:", placeholder="e.g., Tesla")
277
- else:
278
- companies = ["Apple", "Google", "Microsoft", "Amazon", "Tesla", "Meta", "Netflix", "Uber", "Airbnb", "Twitter"]
279
- company_name = st.sidebar.selectbox("Select Company:", companies)
280
-
281
- # Analysis settings
282
- max_articles = st.sidebar.slider("Maximum Articles to Analyze:", min_value=5, max_value=20, value=10)
283
- st.sidebar.markdown("---")
284
-
285
- # Analysis button
286
- analyze_button = st.sidebar.button("Analyze Company News", type="primary")
287
-
288
- # Audio playback settings
289
- st.sidebar.markdown("## Audio Settings")
290
- audio_speed = st.sidebar.select_slider("TTS Speech Speed:", options=["Slow", "Normal", "Fast"], value="Normal")
291
- st.sidebar.markdown("---")
292
-
293
- # Add option to see JSON in example format
294
- st.sidebar.markdown("## Developer Options")
295
- show_json = st.sidebar.checkbox("Show JSON output in example format")
296
- st.sidebar.markdown("---")
297
-
298
- # About section
299
- with st.sidebar.expander("About This App"):
300
- st.markdown("""
301
- This application performs:
302
- - News extraction from multiple sources
303
- - Sentiment analysis of the content
304
- - Topic identification and comparative analysis
305
- - Text-to-speech conversion to Hindi
306
-
307
- Built with Streamlit, FastAPI, and various NLP tools.
308
- """)
309
-
310
- # Main content area
311
- if analyze_button and company_name:
312
- with st.spinner(f"Analyzing news for {company_name}... This may take a minute"):
313
- # Perform complete analysis
314
- response = make_api_request(
315
- "/api/complete_analysis",
316
- {"company_name": company_name}
317
- )
318
-
319
- if not response:
320
- st.error("Failed to retrieve data. Please try again.")
321
- elif "detail" in response:
322
- st.error(response["detail"])
323
- else:
324
- # Display company header
325
- st.markdown(f"<h2 class='sub-header'>Analysis Results for {response['Company']}</h2>", unsafe_allow_html=True)
326
-
327
- # Display sentiment summary
328
- col1, col2 = st.columns([2, 1])
329
-
330
- with col1:
331
- st.markdown("<div class='card'>", unsafe_allow_html=True)
332
- st.markdown("<h3 class='sub-header'>Sentiment Overview</h3>", unsafe_allow_html=True)
333
- st.markdown(f"{response['Final Sentiment Analysis']}")
334
- st.markdown("</div>", unsafe_allow_html=True)
335
-
336
- with col2:
337
- sentiment_data = response["Comparative Sentiment Score"]["Sentiment Distribution"]
338
- fig = plot_sentiment_distribution(sentiment_data)
339
- st.pyplot(fig)
340
-
341
- st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
342
-
343
- # Display Hindi TTS audio
344
- if "Audio" in response and response["Audio"]:
345
- st.markdown("<h3 class='sub-header'>Hindi Audio Summary</h3>", unsafe_allow_html=True)
346
-
347
- audio_message = response["Audio"]
348
-
349
- if audio_message == "Failed to generate audio":
350
- st.warning("Hindi audio could not be generated. However, you can still read the Hindi text below.")
351
- else:
352
- try:
353
- # Check if the response contains the actual audio file path
354
- audio_file_path = response.get("_audio_file_path")
355
-
356
- if audio_file_path:
357
- # Extract the filename
358
- audio_filename = os.path.basename(audio_file_path)
359
- audio_url = f"{API_BASE_URL}/api/audio/{audio_filename}"
360
- else:
361
- # If no path is provided, just display a message
362
- st.info("Audio is available but the path was not provided.")
363
- audio_url = None
364
-
365
- if audio_url:
366
- # Attempt to download the audio file
367
- audio_response = requests.get(audio_url)
368
- if audio_response.status_code == 200:
369
- # Save temporarily
370
- temp_audio_path = f"temp_audio_{os.path.basename(audio_url)}"
371
- with open(temp_audio_path, "wb") as f:
372
- f.write(audio_response.content)
373
-
374
- # Play from local file
375
- st.markdown("<div class='audio-container'>", unsafe_allow_html=True)
376
- st.audio(temp_audio_path, format="audio/mp3")
377
-
378
- # Display audio download link
379
- st.markdown(f"<a href='{audio_url}' download='hindi_summary.mp3'>Download Hindi Audio</a>", unsafe_allow_html=True)
380
-
381
- # Clean up temp file (optional)
382
- # os.remove(temp_audio_path) # Uncomment to delete after use
383
- else:
384
- st.warning(f"Unable to load audio file (HTTP {audio_response.status_code}). You can still read the Hindi text below.")
385
- else:
386
- st.info("Hindi audio summary would be available here.")
387
- except Exception as e:
388
- st.warning(f"Error playing audio: {str(e)}. You can still read the Hindi text below.")
389
-
390
- # Display the Hindi text with better formatting
391
- with st.expander("Show Hindi Text"):
392
- hindi_text = response.get("Hindi Summary", "Hindi text not available.")
393
-
394
- # Format the text for better readability
395
- paragraphs = hindi_text.split("। ")
396
-
397
- for paragraph in paragraphs:
398
- if paragraph.strip():
399
- # Add a period if it doesn't end with one
400
- if not paragraph.strip().endswith("।"):
401
- paragraph += "।"
402
- st.markdown(f"<p style='font-size: 16px; margin-bottom: 10px;'>{paragraph}</p>", unsafe_allow_html=True)
403
-
404
- st.markdown("</div>", unsafe_allow_html=True)
405
-
406
- st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
407
-
408
- # Display articles
409
- st.markdown("<h3 class='sub-header'>News Articles</h3>", unsafe_allow_html=True)
410
- articles = response.get("Articles", [])
411
-
412
- if not articles:
413
- st.info("No articles found for this company.")
414
- else:
415
- for i, article in enumerate(articles):
416
- display_article(article, i)
417
-
418
- st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
419
-
420
- # Display comparative analysis
421
- st.markdown("<h3 class='sub-header'>Comparative Analysis</h3>", unsafe_allow_html=True)
422
-
423
- # Display topic overlap
424
- topic_data = response["Comparative Sentiment Score"]["Topic Overlap"]
425
-
426
- col1, col2 = st.columns(2)
427
-
428
- with col1:
429
- st.markdown("<div class='card'>", unsafe_allow_html=True)
430
- st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True)
431
-
432
- common_topics = topic_data.get("Common Topics Across All", [])
433
- if common_topics:
434
- for topic in common_topics:
435
- st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
436
- else:
437
- st.info("No common topics found across articles.")
438
-
439
- st.markdown("</div>", unsafe_allow_html=True)
440
-
441
- with col2:
442
- st.markdown("<div class='card'>", unsafe_allow_html=True)
443
- st.markdown("<h4>Coverage Comparison</h4>", unsafe_allow_html=True)
444
-
445
- comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", [])
446
- if comparisons:
447
- for i, comparison in enumerate(comparisons[:3]): # Show only top 3 comparisons
448
- st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True)
449
- st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True)
450
- else:
451
- st.info("No comparative insights available.")
452
-
453
- st.markdown("</div>", unsafe_allow_html=True)
454
-
455
- # Display full comparison in expander
456
- with st.expander("View All Comparisons"):
457
- comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", [])
458
- for i, comparison in enumerate(comparisons):
459
- st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True)
460
- st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True)
461
- st.markdown("<hr>", unsafe_allow_html=True)
462
-
463
- # Show JSON in example format if requested
464
- if show_json:
465
- st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
466
- st.markdown("<h3 class='sub-header'>Example JSON Format</h3>", unsafe_allow_html=True)
467
-
468
- # Get the formatted JSON
469
- json_output = generate_example_output(company_name)
470
-
471
- # Display the JSON in a code block
472
- st.code(json_output, language="json")
473
- else:
474
- # Display placeholder
475
- st.markdown("<div class='card'>", unsafe_allow_html=True)
476
- st.markdown("<h3 class='sub-header'>Enter a Company Name to Begin Analysis</h3>", unsafe_allow_html=True)
477
- st.markdown("""
478
- <p class='info-text'>
479
- This application will:
480
- </p>
481
- <ul class='info-text'>
482
- <li>Extract news articles from multiple sources</li>
483
- <li>Analyze sentiment (positive, negative, neutral)</li>
484
- <li>Identify key topics in each article</li>
485
- <li>Perform comparative analysis across articles</li>
486
- <li>Generate Hindi speech output summarizing the findings</li>
487
- </ul>
488
- """, unsafe_allow_html=True)
489
- st.markdown("</div>", unsafe_allow_html=True)
490
-
491
- # Sample output image
492
- st.image("https://miro.medium.com/max/1400/1*Ger-949PgQnaje2oa9XMdw.png", caption="Sample sentiment analysis visualization")
493
-
494
- # Footer
495
- st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
496
- st.markdown("<p class='info-text' style='text-align: center;'>News Summarization & Text-to-Speech Application | Developed with Streamlit and FastAPI</p>", unsafe_allow_html=True)
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+ import json
5
+ import os
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ import base64
9
+ from io import BytesIO
10
+ from PIL import Image, ImageEnhance
11
+ import time
12
+ from typing import Dict, Any, List, Optional
13
+ import uuid
14
+ import asyncio
15
+ from pydantic import BaseModel
16
+ import traceback
17
+
18
+ # Import backend utility functions (assuming these are in a separate utils.py file)
19
+ # For Hugging Face Spaces, you'll need to include these functions in the same file or a utils.py alongside app.py
20
+ from utils import (search_news, analyze_article_sentiment, perform_comparative_analysis,
21
+ translate_to_hindi, text_to_speech, prepare_final_report, NewsArticle)
22
+
23
+ # For this example, I'll assume utils.py is available. If not, you'd need to paste those function definitions here.
24
+
25
+ # API Base URL - Not needed since we're integrating directly, but kept for reference
26
+ API_BASE_URL = "http://localhost:8000"
27
+
28
+ # Define request/response models (from api.py)
29
+ class CompanyRequest(BaseModel):
30
+ company_name: str
31
+
32
+ class TextToSpeechRequest(BaseModel):
33
+ text: str
34
+ output_filename: Optional[str] = None
35
+
36
+ class SentimentAnalysisRequest(BaseModel):
37
+ articles: List[Dict[str, Any]]
38
+
39
+ # Backend functions adapted from api.py
40
+ async def get_news(company_name: str) -> Dict[str, Any]:
41
+ try:
42
+ articles = search_news(company_name, num_articles=5)
43
+ if not articles:
44
+ return {"error": f"No news articles found for {company_name}"}
45
+ article_data = [article.to_dict() for article in articles]
46
+ return {"articles": article_data}
47
+ except Exception as e:
48
+ return {"error": str(e)}
49
+
50
+ async def analyze_sentiment(articles: List[Dict[str, Any]]) -> Dict[str, Any]:
51
+ try:
52
+ news_articles = []
53
+ for article_dict in articles:
54
+ article = NewsArticle(
55
+ title=article_dict["title"],
56
+ url=article_dict["url"],
57
+ content=article_dict["content"],
58
+ summary=article_dict.get("summary", ""),
59
+ source=article_dict.get("source", ""),
60
+ date=article_dict.get("date", ""),
61
+ sentiment=article_dict.get("sentiment", ""),
62
+ topics=article_dict.get("topics", [])
63
+ )
64
+ news_articles.append(article)
65
+
66
+ detailed_sentiment = [analyze_article_sentiment(article) for article in news_articles]
67
+ comparative_analysis = perform_comparative_analysis(news_articles)
68
+ return {
69
+ "sentiment_analysis": {
70
+ "detailed_sentiment": detailed_sentiment,
71
+ "comparative_analysis": comparative_analysis
72
+ }
73
+ }
74
+ except Exception as e:
75
+ return {"error": str(e)}
76
+
77
+ async def generate_speech(text: str, output_filename: str = None) -> Dict[str, Any]:
78
+ try:
79
+ if not output_filename:
80
+ unique_id = uuid.uuid4().hex
81
+ output_filename = f"audio_files/{unique_id}.mp3"
82
+ elif not output_filename.startswith("audio_files/"):
83
+ output_filename = f"audio_files/{output_filename}"
84
+
85
+ os.makedirs("audio_files", exist_ok=True)
86
+ hindi_text = translate_to_hindi(text)
87
+ audio_file = text_to_speech(hindi_text, output_filename)
88
+ if not audio_file:
89
+ return {"error": "Failed to generate audio file"}
90
+ return {"audio_file": audio_file, "text": hindi_text}
91
+ except Exception as e:
92
+ return {"error": str(e)}
93
+
94
+ async def complete_analysis(company_name: str) -> Dict[str, Any]:
95
+ try:
96
+ articles = search_news(company_name, num_articles=5)
97
+ if not articles:
98
+ return {"error": f"No news articles found for {company_name}"}
99
+
100
+ comparative_analysis = perform_comparative_analysis(articles)
101
+ final_report = prepare_final_report(company_name, articles, comparative_analysis)
102
+
103
+ unique_id = uuid.uuid4().hex
104
+ output_filename = f"audio_files/{unique_id}.mp3"
105
+ hindi_text = final_report["Hindi Summary"]
106
+ audio_file = text_to_speech(hindi_text, output_filename)
107
+
108
+ formatted_response = {
109
+ "Company": company_name,
110
+ "Articles": final_report["Articles"],
111
+ "Comparative Sentiment Score": {
112
+ "Sentiment Distribution": comparative_analysis["Sentiment Distribution"],
113
+ "Coverage Differences": comparative_analysis["Coverage Differences"],
114
+ "Topic Overlap": {
115
+ "Common Topics": comparative_analysis["Topic Overlap"]["Common Topics Across All"],
116
+ }
117
+ },
118
+ "Final Sentiment Analysis": comparative_analysis["Final Sentiment Analysis"],
119
+ "Hindi Summary": final_report["Hindi Summary"]
120
+ }
121
+
122
+ unique_topics = comparative_analysis["Topic Overlap"]["Unique Topics By Article"]
123
+ for article_idx, topics in unique_topics.items():
124
+ article_num = int(article_idx) + 1
125
+ formatted_response["Comparative Sentiment Score"]["Topic Overlap"][f"Unique Topics in Article {article_num}"] = topics
126
+
127
+ if len(articles) <= 1:
128
+ formatted_response["Comparative Sentiment Score"]["Coverage Differences"] = [
129
+ {
130
+ "Comparison": f"Only one article about {company_name} was found, limiting comparative analysis.",
131
+ "Impact": "Unable to compare coverage across multiple sources for more comprehensive insights."
132
+ }
133
+ ]
134
+
135
+ if audio_file:
136
+ formatted_response["Audio"] = "[Play Hindi Speech]"
137
+ formatted_response["_audio_file_path"] = audio_file
138
+ else:
139
+ formatted_response["Audio"] = "Failed to generate audio"
140
+
141
+ return formatted_response
142
+ except Exception as e:
143
+ error_message = f"Error processing request: {str(e)}"
144
+ user_message = "An error occurred during analysis. "
145
+ if "timeout" in str(e).lower():
146
+ user_message += "There was a timeout when connecting to news sources. Please try again."
147
+ elif "connection" in str(e).lower():
148
+ user_message += "There was a connection issue. Please check your internet."
149
+ elif "not found" in str(e).lower():
150
+ user_message += f"No information could be found for {company_name}."
151
+ else:
152
+ user_message += "Please try again."
153
+ return {"error": user_message}
154
+
155
+ # Streamlit UI functions (from app.py)
156
+ def generate_example_output(company_name: str) -> str:
157
+ loop = asyncio.new_event_loop()
158
+ asyncio.set_event_loop(loop)
159
+ result = loop.run_until_complete(complete_analysis(company_name))
160
+ formatted_output = {
161
+ "Company": result.get("Company", company_name),
162
+ "Articles": result.get("Articles", []),
163
+ "Comparative Sentiment Score": result.get("Comparative Sentiment Score", {
164
+ "Sentiment Distribution": {},
165
+ "Coverage Differences": [],
166
+ "Topic Overlap": {}
167
+ }),
168
+ "Final Sentiment Analysis": result.get("Final Sentiment Analysis", ""),
169
+ "Audio": result.get("Audio", "No audio available")
170
+ }
171
+ return json.dumps(formatted_output, indent=2)
172
+
173
+ def get_sentiment_color(sentiment: str) -> str:
174
+ if sentiment == "Positive":
175
+ return "positive"
176
+ elif sentiment == "Negative":
177
+ return "negative"
178
+ else:
179
+ return "neutral"
180
+
181
+ def plot_sentiment_distribution(sentiment_data: Dict[str, int]):
182
+ labels = ["Positive", "Neutral", "Negative"]
183
+ values = [sentiment_data.get(label, 0) for label in labels]
184
+ colors = ["#059669", "#6B7280", "#DC2626"]
185
+
186
+ fig, ax = plt.subplots(figsize=(10, 6))
187
+ ax.bar(labels, values, color=colors)
188
+ ax.set_title("Sentiment Distribution", fontsize=16, fontweight='bold')
189
+ ax.set_ylabel("Number of Articles", fontsize=12)
190
+ ax.grid(axis='y', linestyle='--', alpha=0.7)
191
+ for i, v in enumerate(values):
192
+ ax.text(i, v + 0.1, str(v), ha='center', fontweight='bold')
193
+ return fig
194
+
195
+ def display_article(article: Dict[str, Any], index: int):
196
+ st.markdown(f"<div class='card'>", unsafe_allow_html=True)
197
+ sentiment = article.get("Sentiment", "Neutral")
198
+ sentiment_class = get_sentiment_color(sentiment)
199
+ st.markdown(f"<h3 class='article-title'>{index+1}. {article['Title']}</h3>", unsafe_allow_html=True)
200
+ st.markdown(f"<span class='{sentiment_class}'>{sentiment}</span>", unsafe_allow_html=True)
201
+ st.markdown("<div class='article-summary'>", unsafe_allow_html=True)
202
+ st.markdown(f"{article.get('Summary', 'No summary available.')}", unsafe_allow_html=True)
203
+ st.markdown("</div>", unsafe_allow_html=True)
204
+ if "Topics" in article and article["Topics"]:
205
+ st.markdown("<div>", unsafe_allow_html=True)
206
+ for topic in article["Topics"]:
207
+ st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
208
+ st.markdown("</div>", unsafe_allow_html=True)
209
+ st.markdown("</div>", unsafe_allow_html=True)
210
+
211
+ # Streamlit App
212
+ st.set_page_config(
213
+ page_title="News Summarization & TTS",
214
+ page_icon="📰",
215
+ layout="wide",
216
+ initial_sidebar_state="expanded"
217
+ )
218
+
219
+ st.markdown("""
220
+ <style>
221
+ .main-header { font-size: 2.5rem; font-weight: 700; color: #1E3A8A; margin-bottom: 1rem; }
222
+ .sub-header { font-size: 1.5rem; font-weight: 600; color: #2563EB; margin-top: 1rem; margin-bottom: 0.5rem; }
223
+ .card { padding: 1.5rem; border-radius: 0.5rem; background-color: #F8FAFC; border: 1px solid #E2E8F0; margin-bottom: 1rem; }
224
+ .positive { color: #059669; font-weight: 600; }
225
+ .negative { color: #DC2626; font-weight: 600; }
226
+ .neutral { color: #6B7280; font-weight: 600; }
227
+ .topic-tag { display: inline-block; padding: 0.25rem 0.5rem; border-radius: 2rem; background-color: #E5E7EB; color: #1F2937; font-size: 0.75rem; margin-right: 0.5rem; margin-bottom: 0.5rem; }
228
+ .audio-container { width: 100%; padding: 1rem; background-color: #F3F4F6; border-radius: 0.5rem; margin-top: 1rem; }
229
+ .info-text { font-size: 0.9rem; color: #4B5563; }
230
+ .article-title { font-size: 1.2rem; font-weight: 600; color: #111827; margin-bottom: 0.5rem; margin-top: 0.5rem; }
231
+ .article-summary { font-size: 0.9rem; color: #374151; margin-bottom: 0.5rem; }
232
+ .section-divider { height: 1px; background-color: #E5E7EB; margin: 1.5rem 0; }
233
+ </style>
234
+ """, unsafe_allow_html=True)
235
+
236
+ st.markdown("<h1 class='main-header'>📰 News Summarization & Text-to-Speech</h1>", unsafe_allow_html=True)
237
+ st.markdown("""
238
+ <p class='info-text'>
239
+ This application extracts news articles about a company, performs sentiment analysis, conducts comparative analysis,
240
+ and generates a text-to-speech output in Hindi. Enter a company name to get started.
241
+ </p>
242
+ """, unsafe_allow_html=True)
243
+
244
+ # Sidebar
245
+ st.sidebar.image("https://cdn-icons-png.flaticon.com/512/2593/2593073.png", width=100)
246
+ st.sidebar.title("News Analysis Settings")
247
+
248
+ company_input_method = st.sidebar.radio(
249
+ "Select company input method:",
250
+ options=["Text Input", "Choose from List"]
251
+ )
252
+
253
+ if company_input_method == "Text Input":
254
+ company_name = st.sidebar.text_input("Enter Company Name:", placeholder="e.g., Tesla")
255
+ else:
256
+ companies = ["Apple", "Google", "Microsoft", "Amazon", "Tesla", "Meta", "Netflix", "Uber", "Airbnb", "Twitter"]
257
+ company_name = st.sidebar.selectbox("Select Company:", companies)
258
+
259
+ max_articles = st.sidebar.slider("Maximum Articles to Analyze:", min_value=5, max_value=20, value=10)
260
+ analyze_button = st.sidebar.button("Analyze Company News", type="primary")
261
+ audio_speed = st.sidebar.select_slider("TTS Speech Speed:", options=["Slow", "Normal", "Fast"], value="Normal")
262
+ show_json = st.sidebar.checkbox("Show JSON output in example format")
263
+
264
+ with st.sidebar.expander("About This App"):
265
+ st.markdown("""
266
+ This application performs:
267
+ - News extraction from multiple sources
268
+ - Sentiment analysis of the content
269
+ - Topic identification and comparative analysis
270
+ - Text-to-speech conversion to Hindi
271
+ """)
272
+
273
+ # Main content
274
+ if analyze_button and company_name:
275
+ with st.spinner(f"Analyzing news for {company_name}... This may take a minute"):
276
+ loop = asyncio.new_event_loop()
277
+ asyncio.set_event_loop(loop)
278
+ response = loop.run_until_complete(complete_analysis(company_name))
279
+
280
+ if "error" in response:
281
+ st.error(response["error"])
282
+ else:
283
+ st.markdown(f"<h2 class='sub-header'>Analysis Results for {response['Company']}</h2>", unsafe_allow_html=True)
284
+
285
+ col1, col2 = st.columns([2, 1])
286
+ with col1:
287
+ st.markdown("<div class='card'>", unsafe_allow_html=True)
288
+ st.markdown("<h3 class='sub-header'>Sentiment Overview</h3>", unsafe_allow_html=True)
289
+ st.markdown(f"{response['Final Sentiment Analysis']}")
290
+ st.markdown("</div>", unsafe_allow_html=True)
291
+ with col2:
292
+ sentiment_data = response["Comparative Sentiment Score"]["Sentiment Distribution"]
293
+ fig = plot_sentiment_distribution(sentiment_data)
294
+ st.pyplot(fig)
295
+
296
+ st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
297
+
298
+ if "Audio" in response and response["Audio"] == "[Play Hindi Speech]":
299
+ st.markdown("<h3 class='sub-header'>Hindi Audio Summary</h3>", unsafe_allow_html=True)
300
+ audio_file_path = response.get("_audio_file_path")
301
+ if audio_file_path and os.path.exists(audio_file_path):
302
+ st.markdown("<div class='audio-container'>", unsafe_allow_html=True)
303
+ st.audio(audio_file_path, format="audio/mp3")
304
+ with open(audio_file_path, "rb") as f:
305
+ audio_bytes = f.read()
306
+ b64 = base64.b64encode(audio_bytes).decode()
307
+ href = f'<a href="data:audio/mp3;base64,{b64}" download="hindi_summary.mp3">Download Hindi Audio</a>'
308
+ st.markdown(href, unsafe_allow_html=True)
309
+ st.markdown("</div>", unsafe_allow_html=True)
310
+ else:
311
+ st.warning("Hindi audio could not be generated.")
312
+
313
+ with st.expander("Show Hindi Text"):
314
+ hindi_text = response.get("Hindi Summary", "Hindi text not available.")
315
+ paragraphs = hindi_text.split("")
316
+ for paragraph in paragraphs:
317
+ if paragraph.strip():
318
+ if not paragraph.strip().endswith("।"):
319
+ paragraph += "।"
320
+ st.markdown(f"<p style='font-size: 16px; margin-bottom: 10px;'>{paragraph}</p>", unsafe_allow_html=True)
321
+
322
+ st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
323
+
324
+ st.markdown("<h3 class='sub-header'>News Articles</h3>", unsafe_allow_html=True)
325
+ articles = response.get("Articles", [])
326
+ if not articles:
327
+ st.info("No articles found for this company.")
328
+ else:
329
+ for i, article in enumerate(articles):
330
+ display_article(article, i)
331
+
332
+ st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
333
+
334
+ st.markdown("<h3 class='sub-header'>Comparative Analysis</h3>", unsafe_allow_html=True)
335
+ col1, col2 = st.columns(2)
336
+ with col1:
337
+ st.markdown("<div class='card'>", unsafe_allow_html=True)
338
+ st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True)
339
+ common_topics = response["Comparative Sentiment Score"]["Topic Overlap"].get("Common Topics", [])
340
+ if common_topics:
341
+ for topic in common_topics:
342
+ st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True)
343
+ else:
344
+ st.info("No common topics found across articles.")
345
+ st.markdown("</div>", unsafe_allow_html=True)
346
+ with col2:
347
+ st.markdown("<div class='card'>", unsafe_allow_html=True)
348
+ st.markdown("<h4>Coverage Comparison</h4>", unsafe_allow_html=True)
349
+ comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", [])
350
+ if comparisons:
351
+ for i, comparison in enumerate(comparisons[:3]):
352
+ st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True)
353
+ st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True)
354
+ else:
355
+ st.info("No comparative insights available.")
356
+ st.markdown("</div>", unsafe_allow_html=True)
357
+
358
+ with st.expander("View All Comparisons"):
359
+ for i, comparison in enumerate(comparisons):
360
+ st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True)
361
+ st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True)
362
+ st.markdown("<hr>", unsafe_allow_html=True)
363
+
364
+ if show_json:
365
+ st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
366
+ st.markdown("<h3 class='sub-header'>Example JSON Format</h3>", unsafe_allow_html=True)
367
+ json_output = generate_example_output(company_name)
368
+ st.code(json_output, language="json")
369
+ else:
370
+ st.markdown("<div class='card'>", unsafe_allow_html=True)
371
+ st.markdown("<h3 class='sub-header'>Enter a Company Name to Begin Analysis</h3>", unsafe_allow_html=True)
372
+ st.markdown("""
373
+ <p class='info-text'>
374
+ This application will:
375
+ </p>
376
+ <ul class='info-text'>
377
+ <li>Extract news articles from multiple sources</li>
378
+ <li>Analyze sentiment (positive, negative, neutral)</li>
379
+ <li>Identify key topics in each article</li>
380
+ <li>Perform comparative analysis across articles</li>
381
+ <li>Generate Hindi speech output summarizing the findings</li>
382
+ </ul>
383
+ """, unsafe_allow_html=True)
384
+ st.markdown("</div>", unsafe_allow_html=True)
385
+ st.image("https://miro.medium.com/max/1400/1*Ger-949PgQnaje2oa9XMdw.png", caption="Sample sentiment analysis visualization")
386
+
387
+ st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True)
388
+ st.markdown("<p class='info-text' style='text-align: center;'>News Summarization & Text-to-Speech Application | Developed with Streamlit</p>", unsafe_allow_html=True)