proKBD commited on
Commit
f18e43c
·
verified ·
1 Parent(s): a3d8fca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +317 -141
app.py CHANGED
@@ -6,7 +6,7 @@ import json
6
  import os
7
  import plotly.express as px
8
  import altair as alt
9
- from utils import analyze_company_data, TextToSpeechConverter # Import TextToSpeechConverter
10
 
11
  # Set page config
12
  st.set_page_config(
@@ -43,151 +43,327 @@ def process_company(company_name):
43
  return {"articles": [], "comparative_sentiment_score": {}, "final_sentiment_analysis": "", "audio_path": None}
44
 
45
  def main():
46
- st.title("News Summarization App")
47
- st.write("Analyze news articles and get sentiment analysis for any company.")
48
-
49
- # User input
50
- company_name = st.text_input("Enter company name:", "Tesla")
51
-
52
- if st.button("Analyze"):
53
- with st.spinner("Analyzing news articles..."):
54
- try:
55
- # Process company data
56
- data = analyze_company_data(company_name)
57
-
58
- if not data["articles"]:
59
- st.error("No articles found for analysis.")
60
- return
61
-
62
- # Display overall sentiment
63
- st.subheader("Overall Sentiment Analysis")
64
- st.write(data["final_sentiment_analysis"])
65
-
66
- # Create DataFrame for sentiment scores
67
- sentiment_df = pd.DataFrame(data["comparative_sentiment_score"])
68
-
69
- # Display sentiment distribution by source
70
- st.subheader("Sentiment Distribution by Source")
71
-
72
- # Convert sentiment labels to numeric values for visualization
73
- sentiment_map = {'positive': 1, 'neutral': 0, 'negative': -1}
74
- numeric_df = sentiment_df.replace(sentiment_map)
75
-
76
- # Calculate sentiment distribution
77
- sentiment_dist = numeric_df.apply(lambda x: x.value_counts(normalize=True).to_dict())
78
-
79
- # Create a new DataFrame for visualization
80
- viz_data = []
81
- for source in sentiment_df.columns:
82
- dist = sentiment_dist[source]
83
- for sentiment, percentage in dist.items():
84
- viz_data.append({
85
- 'Source': source,
86
- 'Sentiment': sentiment,
87
- 'Percentage': percentage * 100
88
- })
89
-
90
- viz_df = pd.DataFrame(viz_data)
91
-
92
- # Create stacked bar chart
93
- fig = px.bar(viz_df,
94
- x='Source',
95
- y='Percentage',
96
- color='Sentiment',
97
- title='Sentiment Distribution by Source',
98
- barmode='stack')
99
-
100
- fig.update_layout(
101
- yaxis_title='Percentage',
102
- xaxis_title='News Source',
103
- legend_title='Sentiment'
104
- )
105
-
106
- st.plotly_chart(fig)
107
-
108
- # Display fine-grained sentiment analysis
109
- st.subheader("Fine-grained Sentiment Analysis")
110
-
111
- # Create tabs for different fine-grained analyses
112
- tab1, tab2, tab3 = st.tabs(["Financial Sentiment", "Emotional Sentiment", "ESG Sentiment"])
113
-
114
- with tab1:
115
- st.write("Financial Market Impact Analysis")
116
- financial_data = []
117
- for article in data["articles"]:
118
- if "financial_sentiment" in article:
119
- financial_data.append({
120
- "Article": article["title"],
121
- "Financial Impact": article["financial_sentiment"],
122
- "Confidence": article.get("fine_grained_sentiment", {}).get("models", {}).get("financial", {}).get("confidence", 0)
123
- })
124
- if financial_data:
125
- st.dataframe(pd.DataFrame(financial_data))
126
- else:
127
- st.info("Financial sentiment analysis not available for these articles.")
128
-
129
- with tab2:
130
- st.write("Emotional Sentiment Analysis")
131
- emotional_data = []
132
- for article in data["articles"]:
133
- if "emotional_sentiment" in article:
134
- emotional_data.append({
135
- "Article": article["title"],
136
- "Emotional Impact": article["emotional_sentiment"],
137
- "Confidence": article.get("fine_grained_sentiment", {}).get("models", {}).get("emotion", {}).get("confidence", 0)
138
- })
139
- if emotional_data:
140
- st.dataframe(pd.DataFrame(emotional_data))
141
- else:
142
- st.info("Emotional sentiment analysis not available for these articles.")
143
-
144
- with tab3:
145
- st.write("ESG (Environmental, Social, Governance) Analysis")
146
- esg_data = []
147
- for article in data["articles"]:
148
- if "esg_sentiment" in article:
149
- esg_data.append({
150
- "Article": article["title"],
151
- "ESG Impact": article["esg_sentiment"],
152
- "Confidence": article.get("fine_grained_sentiment", {}).get("models", {}).get("esg", {}).get("confidence", 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  })
154
- if esg_data:
155
- st.dataframe(pd.DataFrame(esg_data))
156
- else:
157
- st.info("ESG sentiment analysis not available for these articles.")
158
-
159
- # Display articles with detailed sentiment analysis
160
- st.subheader("Recent Articles")
161
- for article in data["articles"]:
162
- with st.expander(article["title"]):
163
- st.write(f"**Source:** {article['source']}")
164
- st.write(f"**Summary:** {article['summary']}")
165
- st.write(f"**Overall Sentiment:** {article['sentiment']}")
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- # Display fine-grained sentiment if available
168
- fine_grained = article.get("fine_grained_sentiment", {})
169
- if fine_grained:
170
- st.write("**Fine-grained Analysis:**")
171
- for model_name, model_data in fine_grained.get("models", {}).items():
172
- st.write(f"- {model_name.title()}: {model_data.get('category', 'N/A')} (Confidence: {model_data.get('confidence', 0):.2f})")
173
 
174
- # Display sentiment indices if available
175
- indices = article.get("sentiment_indices", {})
176
- if indices:
177
- st.write("**Sentiment Indices:**")
178
- for index_name, value in indices.items():
179
- st.write(f"- {index_name.replace('_', ' ').title()}: {value:.2f}")
180
 
181
- st.write(f"**URL:** [{article['url']}]({article['url']})")
182
-
183
- # Display audio player if audio is available
184
- if data.get("audio_path") and os.path.exists(data["audio_path"]):
185
- st.subheader("Hindi Audio Summary")
186
- st.audio(data["audio_path"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- except Exception as e:
189
- st.error(f"Error analyzing company data: {str(e)}")
190
- print(f"Error: {str(e)}")
 
 
 
 
 
191
 
192
  if __name__ == "__main__":
193
  main()
 
6
  import os
7
  import plotly.express as px
8
  import altair as alt
9
+ from utils import analyze_company_data, TextToSpeechConverter
10
 
11
  # Set page config
12
  st.set_page_config(
 
43
  return {"articles": [], "comparative_sentiment_score": {}, "final_sentiment_analysis": "", "audio_path": None}
44
 
45
  def main():
46
+ st.title("📰 News Summarization and Analysis")
47
+
48
+ # Sidebar
49
+ st.sidebar.header("Settings")
50
+
51
+ # Replace dropdown with text input
52
+ company = st.sidebar.text_input(
53
+ "Enter Company Name",
54
+ placeholder="e.g., Tesla, Apple, Microsoft, or any other company",
55
+ help="Enter the name of any company you want to analyze"
56
+ )
57
+
58
+ if st.sidebar.button("Analyze") and company:
59
+ if len(company.strip()) < 2:
60
+ st.sidebar.error("Please enter a valid company name (at least 2 characters)")
61
+ else:
62
+ with st.spinner("Analyzing news articles..."):
63
+ try:
64
+ # Process company data
65
+ data = analyze_company_data(company)
66
+
67
+ if not data["articles"]:
68
+ st.error("No articles found for analysis.")
69
+ return
70
+
71
+ # Display Articles
72
+ st.header("📑 News Articles")
73
+ for idx, article in enumerate(data["articles"], 1):
74
+ with st.expander(f"Article {idx}: {article['title']}"):
75
+ st.write("**Content:**", article.get("content", "No content available"))
76
+ if "summary" in article:
77
+ st.write("**Summary:**", article["summary"])
78
+ st.write("**Source:**", article.get("source", "Unknown"))
79
+
80
+ # Enhanced sentiment display
81
+ if "sentiment" in article:
82
+ sentiment_col1, sentiment_col2 = st.columns(2)
83
+ with sentiment_col1:
84
+ st.write("**Sentiment:**", article["sentiment"])
85
+ st.write("**Confidence Score:**", f"{article.get('sentiment_score', 0)*100:.1f}%")
86
+
87
+ with sentiment_col2:
88
+ # Display fine-grained sentiment if available
89
+ if "fine_grained_sentiment" in article and article["fine_grained_sentiment"]:
90
+ fine_grained = article["fine_grained_sentiment"]
91
+ if "category" in fine_grained:
92
+ st.write("**Detailed Sentiment:**", fine_grained["category"])
93
+ if "confidence" in fine_grained:
94
+ st.write("**Confidence:**", f"{fine_grained['confidence']*100:.1f}%")
95
+
96
+ # Display sentiment indices if available
97
+ if "sentiment_indices" in article and article["sentiment_indices"]:
98
+ st.markdown("**Sentiment Indices:**")
99
+ indices = article["sentiment_indices"]
100
+
101
+ # Create columns for displaying indices
102
+ idx_cols = st.columns(3)
103
+
104
+ # Display positivity and negativity in first column
105
+ with idx_cols[0]:
106
+ if "positivity_index" in indices:
107
+ st.markdown(f"**Positivity:** {indices['positivity_index']:.2f}")
108
+ if "negativity_index" in indices:
109
+ st.markdown(f"**Negativity:** {indices['negativity_index']:.2f}")
110
+
111
+ # Display emotional intensity and controversy in second column
112
+ with idx_cols[1]:
113
+ if "emotional_intensity" in indices:
114
+ st.markdown(f"**Emotional Intensity:** {indices['emotional_intensity']:.2f}")
115
+ if "controversy_score" in indices:
116
+ st.markdown(f"**Controversy:** {indices['controversy_score']:.2f}")
117
+
118
+ # Display confidence and ESG in third column
119
+ with idx_cols[2]:
120
+ if "confidence_score" in indices:
121
+ st.markdown(f"**Confidence:** {indices['confidence_score']:.2f}")
122
+ if "esg_relevance" in indices:
123
+ st.markdown(f"**ESG Relevance:** {indices['esg_relevance']:.2f}")
124
+
125
+ # Display entities if available
126
+ if "entities" in article and article["entities"]:
127
+ st.markdown("**Named Entities:**")
128
+ entities = article["entities"]
129
+
130
+ # Organizations
131
+ if "ORG" in entities and entities["ORG"]:
132
+ st.write("**Organizations:**", ", ".join(entities["ORG"]))
133
+
134
+ # People
135
+ if "PERSON" in entities and entities["PERSON"]:
136
+ st.write("**People:**", ", ".join(entities["PERSON"]))
137
+
138
+ # Locations
139
+ if "GPE" in entities and entities["GPE"]:
140
+ st.write("**Locations:**", ", ".join(entities["GPE"]))
141
+
142
+ # Money
143
+ if "MONEY" in entities and entities["MONEY"]:
144
+ st.write("**Financial Values:**", ", ".join(entities["MONEY"]))
145
+
146
+ # Display sentiment targets if available
147
+ if "sentiment_targets" in article and article["sentiment_targets"]:
148
+ st.markdown("**Sentiment Targets:**")
149
+ targets = article["sentiment_targets"]
150
+ for target in targets:
151
+ st.markdown(f"**{target['entity']}** ({target['type']}): {target['sentiment']} ({target['confidence']*100:.1f}%)")
152
+ st.markdown(f"> {target['context']}")
153
+ st.markdown("---")
154
+
155
+ if "url" in article:
156
+ st.write("**[Read More](%s)**" % article["url"])
157
+
158
+ # Display Comparative Analysis
159
+ st.header("📊 Comparative Analysis")
160
+ analysis = data.get("comparative_sentiment_score", {})
161
+
162
+ # Sentiment Distribution
163
+ if "sentiment_distribution" in analysis:
164
+ st.subheader("Sentiment Distribution")
165
+
166
+ sentiment_dist = analysis["sentiment_distribution"]
167
+
168
+ try:
169
+ # Extract basic sentiment data
170
+ if isinstance(sentiment_dist, dict):
171
+ if "basic" in sentiment_dist and isinstance(sentiment_dist["basic"], dict):
172
+ basic_dist = sentiment_dist["basic"]
173
+ elif any(k in sentiment_dist for k in ['positive', 'negative', 'neutral']):
174
+ basic_dist = {k: v for k, v in sentiment_dist.items()
175
+ if k in ['positive', 'negative', 'neutral']}
176
+ else:
177
+ basic_dist = {'positive': 0, 'negative': 0, 'neutral': 1}
178
+ else:
179
+ basic_dist = {'positive': 0, 'negative': 0, 'neutral': 1}
180
+
181
+ # Calculate percentages
182
+ total_articles = sum(basic_dist.values())
183
+ if total_articles > 0:
184
+ percentages = {
185
+ k: (v / total_articles) * 100
186
+ for k, v in basic_dist.items()
187
+ }
188
+ else:
189
+ percentages = {k: 0 for k in basic_dist}
190
+
191
+ # Display as metrics
192
+ st.write("**Sentiment Distribution:**")
193
+
194
+ col1, col2, col3 = st.columns(3)
195
+ with col1:
196
+ st.metric(
197
+ "Positive",
198
+ basic_dist.get('positive', 0),
199
+ f"{percentages.get('positive', 0):.1f}%"
200
+ )
201
+ with col2:
202
+ st.metric(
203
+ "Negative",
204
+ basic_dist.get('negative', 0),
205
+ f"{percentages.get('negative', 0):.1f}%"
206
+ )
207
+ with col3:
208
+ st.metric(
209
+ "Neutral",
210
+ basic_dist.get('neutral', 0),
211
+ f"{percentages.get('neutral', 0):.1f}%"
212
+ )
213
+
214
+ # Create visualization
215
+ chart_data = pd.DataFrame({
216
+ 'Sentiment': ['Positive', 'Negative', 'Neutral'],
217
+ 'Count': [
218
+ basic_dist.get('positive', 0),
219
+ basic_dist.get('negative', 0),
220
+ basic_dist.get('neutral', 0)
221
+ ],
222
+ 'Percentage': [
223
+ f"{percentages.get('positive', 0):.1f}%",
224
+ f"{percentages.get('negative', 0):.1f}%",
225
+ f"{percentages.get('neutral', 0):.1f}%"
226
+ ]
227
  })
228
+
229
+ chart = alt.Chart(chart_data).mark_bar().encode(
230
+ y='Sentiment',
231
+ x='Count',
232
+ color=alt.Color('Sentiment', scale=alt.Scale(
233
+ domain=['Positive', 'Negative', 'Neutral'],
234
+ range=['green', 'red', 'gray']
235
+ )),
236
+ tooltip=['Sentiment', 'Count', 'Percentage']
237
+ ).properties(
238
+ width=600,
239
+ height=300
240
+ )
241
+
242
+ text = chart.mark_text(
243
+ align='left',
244
+ baseline='middle',
245
+ dx=3
246
+ ).encode(
247
+ text='Percentage'
248
+ )
249
+
250
+ chart_with_text = (chart + text)
251
+ st.altair_chart(chart_with_text, use_container_width=True)
252
 
253
+ except Exception as e:
254
+ st.error(f"Error creating visualization: {str(e)}")
255
+
256
+ # Display sentiment indices if available
257
+ if "sentiment_indices" in analysis and analysis["sentiment_indices"]:
258
+ st.subheader("Sentiment Indices")
259
 
260
+ indices = analysis["sentiment_indices"]
 
 
 
 
 
261
 
262
+ try:
263
+ if isinstance(indices, dict):
264
+ # Display as metrics in columns
265
+ cols = st.columns(3)
266
+
267
+ display_names = {
268
+ "positivity_index": "Positivity",
269
+ "negativity_index": "Negativity",
270
+ "emotional_intensity": "Emotional Intensity",
271
+ "controversy_score": "Controversy",
272
+ "confidence_score": "Confidence",
273
+ "esg_relevance": "ESG Relevance"
274
+ }
275
+
276
+ for i, (key, value) in enumerate(indices.items()):
277
+ if isinstance(value, (int, float)):
278
+ with cols[i % 3]:
279
+ display_name = display_names.get(key, key.replace("_", " ").title())
280
+ st.metric(display_name, f"{value:.2f}")
281
+
282
+ # Create visualization
283
+ chart_data = pd.DataFrame({
284
+ 'Index': [display_names.get(k, k.replace("_", " ").title()) for k in indices.keys()],
285
+ 'Value': [v if isinstance(v, (int, float)) else 0 for v in indices.values()]
286
+ })
287
+
288
+ chart = alt.Chart(chart_data).mark_bar().encode(
289
+ x='Value',
290
+ y='Index',
291
+ color=alt.Color('Index')
292
+ ).properties(
293
+ width=600,
294
+ height=300
295
+ )
296
+
297
+ st.altair_chart(chart, use_container_width=True)
298
+
299
+ # Add descriptions
300
+ with st.expander("Sentiment Indices Explained"):
301
+ st.markdown("""
302
+ - **Positivity**: Measures the positive sentiment in the articles (0-1)
303
+ - **Negativity**: Measures the negative sentiment in the articles (0-1)
304
+ - **Emotional Intensity**: Measures the overall emotional content (0-1)
305
+ - **Controversy**: High when both positive and negative sentiments are strong (0-1)
306
+ - **Confidence**: Confidence in the sentiment analysis (0-1)
307
+ - **ESG Relevance**: Relevance to Environmental, Social, and Governance topics (0-1)
308
+ """)
309
+ except Exception as e:
310
+ st.error(f"Error creating indices visualization: {str(e)}")
311
+
312
+ # Display Final Analysis and Audio
313
+ st.header("🎯 Final Analysis")
314
+ if "final_sentiment_analysis" in data:
315
+ st.write(data["final_sentiment_analysis"])
316
+
317
+ # Display sentiment indices in the sidebar
318
+ if "sentiment_indices" in analysis and analysis["sentiment_indices"]:
319
+ indices = analysis["sentiment_indices"]
320
+ if indices and any(isinstance(v, (int, float)) for v in indices.values()):
321
+ st.sidebar.markdown("### Sentiment Indices")
322
+ for idx_name, idx_value in indices.items():
323
+ if isinstance(idx_value, (int, float)):
324
+ formatted_name = " ".join(word.capitalize() for word in idx_name.replace("_", " ").split())
325
+ st.sidebar.metric(formatted_name, f"{idx_value:.2f}")
326
+
327
+ # Display ensemble model information if available
328
+ if "ensemble_info" in data:
329
+ with st.expander("Ensemble Model Details"):
330
+ ensemble = data["ensemble_info"]
331
+
332
+ if "agreement" in ensemble:
333
+ st.metric("Model Agreement", f"{ensemble['agreement']*100:.1f}%")
334
+
335
+ if "models" in ensemble:
336
+ st.subheader("Individual Model Results")
337
+ models_data = []
338
+ for model_name, model_info in ensemble["models"].items():
339
+ models_data.append({
340
+ "Model": model_name,
341
+ "Sentiment": model_info.get("sentiment", "N/A"),
342
+ "Confidence": f"{model_info.get('confidence', 0)*100:.1f}%"
343
+ })
344
+
345
+ if models_data:
346
+ st.table(pd.DataFrame(models_data))
347
+
348
+ # Audio Playback Section
349
+ st.subheader("🔊 Listen to Analysis (Hindi)")
350
+ if data.get("audio_path") and os.path.exists(data["audio_path"]):
351
+ st.audio(data["audio_path"])
352
+ else:
353
+ st.warning("Hindi audio summary not available")
354
+
355
+ # Total Articles
356
+ if "total_articles" in analysis:
357
+ st.sidebar.info(f"Found {analysis['total_articles']} articles")
358
 
359
+ except Exception as e:
360
+ st.error(f"Error analyzing company data: {str(e)}")
361
+ print(f"Error: {str(e)}")
362
+
363
+ # Add a disclaimer
364
+ st.sidebar.markdown("---")
365
+ st.sidebar.markdown("### About")
366
+ st.sidebar.write("This app analyzes news articles and provides sentiment analysis for any company.")
367
 
368
  if __name__ == "__main__":
369
  main()