shresht8 commited on
Commit
e0b4a17
·
verified ·
1 Parent(s): 71e3164

new new graphs and summaries

Browse files
Files changed (1) hide show
  1. app.py +183 -70
app.py CHANGED
@@ -49,59 +49,106 @@ def process_single_sheet(df, product_name):
49
 
50
  def create_comparison_charts(sentiment_results):
51
  """
52
- Create comparison charts for different products
53
- Returns two plotly figures: bar chart and pie chart
54
  """
55
  # Prepare data for plotting
56
- products = []
57
- sentiments = []
58
- counts = []
59
-
60
  for product, sentiment_counts in sentiment_results.items():
61
- for sentiment, count in sentiment_counts.items():
62
- products.append(product)
63
- sentiments.append(sentiment)
64
- counts.append(count)
65
-
66
- plot_df = pd.DataFrame({
67
- 'Product': products,
68
- 'Sentiment': sentiments,
69
- 'Count': counts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  })
71
 
72
- # Create stacked bar chart
73
- bar_fig = px.bar(plot_df,
74
- x='Product',
75
- y='Count',
76
- color='Sentiment',
77
- title='Sentiment Distribution by Product',
78
- labels={'Count': 'Number of Reviews'},
79
- color_discrete_sequence=px.colors.qualitative.Set3)
80
-
81
- # Create pie chart for overall sentiment distribution
82
- pie_fig = px.pie(plot_df,
83
- values='Count',
84
- names='Sentiment',
85
- title='Overall Sentiment Distribution',
86
- color_discrete_sequence=px.colors.qualitative.Set3)
87
-
88
- # Create summary table
89
- summary_df = plot_df.pivot_table(
90
- values='Count',
91
- index='Product',
92
- columns='Sentiment',
93
- fill_value=0
94
- ).round(2)
95
-
96
- # Add total reviews column
97
- summary_df['Total Reviews'] = summary_df.sum(axis=1)
98
-
99
- # Calculate percentage of positive reviews (Positive + Very Positive)
100
- positive_cols = ['Positive', 'Very Positive']
101
- positive_cols = [col for col in positive_cols if col in summary_df.columns]
102
- summary_df['Positive Ratio'] = (summary_df[positive_cols].sum(axis=1) / summary_df['Total Reviews'] * 100).round(2)
103
-
104
- return bar_fig, pie_fig, summary_df
105
 
106
 
107
  def process_file(file_obj):
@@ -114,7 +161,6 @@ def process_file(file_obj):
114
  all_processed_dfs = {}
115
 
116
  if file_path.endswith('.csv'):
117
- # Process single CSV file
118
  df = pd.read_csv(file_path)
119
  product_name = "Product" # Default name for CSV
120
  processed_df, sentiment_counts = process_single_sheet(df, product_name)
@@ -122,9 +168,7 @@ def process_file(file_obj):
122
  sentiment_results[product_name] = sentiment_counts
123
 
124
  elif file_path.endswith(('.xlsx', '.xls')):
125
- # Process multiple sheets in Excel file
126
  excel_file = pd.ExcelFile(file_path)
127
-
128
  for sheet_name in excel_file.sheet_names:
129
  df = pd.read_excel(file_path, sheet_name=sheet_name)
130
  processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
@@ -134,21 +178,17 @@ def process_file(file_obj):
134
  raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
135
 
136
  # Create visualizations
137
- bar_chart, pie_chart, summary_table = create_comparison_charts(sentiment_results)
138
 
139
- # Save results to a new Excel file
140
  output_path = "sentiment_analysis_results.xlsx"
141
  with pd.ExcelWriter(output_path) as writer:
142
- # Save processed data
143
  for sheet_name, df in all_processed_dfs.items():
144
  df.to_excel(writer, sheet_name=sheet_name, index=False)
145
-
146
- # Save summary
147
- summary_table.to_excel(writer, sheet_name='Summary', index=True)
148
 
149
  return (
150
- bar_chart,
151
- pie_chart,
152
  summary_table,
153
  output_path
154
  )
@@ -158,12 +198,86 @@ def process_file(file_obj):
158
 
159
 
160
  # Create Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  with gr.Blocks() as interface:
162
- gr.Markdown("# Multi-Product Review Sentiment Analysis")
 
163
  gr.Markdown("""
164
- Upload a file to analyze sentiments:
165
- - For CSV: Single product reviews with 'Reviews' column
166
- - For Excel: Multiple sheets, each named after the product, with 'Reviews' column
 
 
 
 
 
 
 
167
  """)
168
 
169
  with gr.Row():
@@ -176,20 +290,19 @@ with gr.Blocks() as interface:
176
  analyze_btn = gr.Button("Analyze Sentiments")
177
 
178
  with gr.Row():
179
- bar_plot = gr.Plot(label="Sentiment Distribution by Product")
180
- pie_plot = gr.Plot(label="Overall Sentiment Distribution")
181
 
182
  with gr.Row():
183
- summary_table = gr.Dataframe(label="Summary Statistics")
184
 
185
  with gr.Row():
186
- output_file = gr.File(label="Download Detailed Results")
187
 
188
  analyze_btn.click(
189
  fn=process_file,
190
  inputs=[file_input],
191
- outputs=[bar_plot, pie_plot, summary_table, output_file]
192
  )
193
 
194
- # Launch the interface
195
- interface.launch()
 
49
 
50
  def create_comparison_charts(sentiment_results):
51
  """
52
+ Create investment-focused comparison charts for different products
 
53
  """
54
  # Prepare data for plotting
55
+ plot_data = []
 
 
 
56
  for product, sentiment_counts in sentiment_results.items():
57
+ # Convert to dictionary and get sum
58
+ sentiment_dict = sentiment_counts.to_dict()
59
+ total = sum(sentiment_dict.values())
60
+
61
+ row = {
62
+ 'Product': product,
63
+ 'Total Reviews': total
64
+ }
65
+ # Calculate percentages for each sentiment
66
+ for sentiment, count in sentiment_dict.items():
67
+ row[sentiment] = (count / total) * 100
68
+ plot_data.append(row)
69
+
70
+ df = pd.DataFrame(plot_data)
71
+
72
+ # Ensure all sentiment columns exist (in case some products don't have all sentiments)
73
+ for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
74
+ if sentiment not in df.columns:
75
+ df[sentiment] = 0
76
+
77
+ # Calculate weighted sentiment score (0 to 100)
78
+ sentiment_weights = {
79
+ 'Very Negative': 0,
80
+ 'Negative': 25,
81
+ 'Neutral': 50,
82
+ 'Positive': 75,
83
+ 'Very Positive': 100
84
+ }
85
+
86
+ df['Sentiment Score'] = 0
87
+ for product in df['Product']:
88
+ score = 0
89
+ for sentiment, weight in sentiment_weights.items():
90
+ if sentiment in df.columns:
91
+ score += (df.loc[df['Product'] == product, sentiment].iloc[0] * weight / 100)
92
+ df.loc[df['Product'] == product, 'Sentiment Score'] = round(score, 2)
93
+
94
+ # Create sentiment score chart
95
+ score_fig = go.Figure()
96
+ score_fig.add_trace(go.Bar(
97
+ x=df['Product'],
98
+ y=df['Sentiment Score'],
99
+ text=df['Sentiment Score'].round(1),
100
+ textposition='auto',
101
+ marker_color='rgb(65, 105, 225)'
102
+ ))
103
+ score_fig.update_layout(
104
+ title='Overall Sentiment Score by Product (0-100)',
105
+ yaxis_title='Weighted Sentiment Score',
106
+ yaxis_range=[0, 100],
107
+ showlegend=False
108
+ )
109
+
110
+ # Calculate Positive-Negative Ratios
111
+ df['Positive Ratio'] = df[['Positive', 'Very Positive']].sum(axis=1)
112
+ df['Negative Ratio'] = df[['Negative', 'Very Negative']].sum(axis=1)
113
+
114
+ # Create Positive-Negative ratio chart
115
+ ratio_fig = go.Figure()
116
+ ratio_fig.add_trace(go.Bar(
117
+ name='Positive',
118
+ x=df['Product'],
119
+ y=df['Positive Ratio'],
120
+ marker_color='rgb(50, 205, 50)'
121
+ ))
122
+ ratio_fig.add_trace(go.Bar(
123
+ name='Negative',
124
+ x=df['Product'],
125
+ y=df['Negative Ratio'],
126
+ marker_color='rgb(220, 20, 60)'
127
+ ))
128
+ ratio_fig.update_layout(
129
+ barmode='group',
130
+ title='Positive vs Negative Sentiment Ratio by Product',
131
+ yaxis_title='Percentage (%)'
132
+ )
133
+
134
+ # Create summary table with investment-relevant metrics
135
+ summary_df = pd.DataFrame({
136
+ 'Product': df['Product'],
137
+ 'Total Reviews': df['Total Reviews'],
138
+ 'Sentiment Score (0-100)': df['Sentiment Score'],
139
+ 'Positive Ratio (%)': df['Positive Ratio'].round(2),
140
+ 'Negative Ratio (%)': df['Negative Ratio'].round(2),
141
+ 'Neutral Ratio (%)': df['Neutral'].round(2)
142
  })
143
 
144
+ # Calculate Confidence Score (avoiding division by zero)
145
+ summary_df['Confidence Score'] = ((summary_df['Positive Ratio (%)'] + summary_df['Negative Ratio (%)']) /
146
+ summary_df['Neutral Ratio (%)'].replace(0, 0.001)).round(2)
147
+
148
+ # Sort by Sentiment Score for easy comparison
149
+ summary_df = summary_df.sort_values('Sentiment Score (0-100)', ascending=False)
150
+
151
+ return score_fig, ratio_fig, summary_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
 
154
  def process_file(file_obj):
 
161
  all_processed_dfs = {}
162
 
163
  if file_path.endswith('.csv'):
 
164
  df = pd.read_csv(file_path)
165
  product_name = "Product" # Default name for CSV
166
  processed_df, sentiment_counts = process_single_sheet(df, product_name)
 
168
  sentiment_results[product_name] = sentiment_counts
169
 
170
  elif file_path.endswith(('.xlsx', '.xls')):
 
171
  excel_file = pd.ExcelFile(file_path)
 
172
  for sheet_name in excel_file.sheet_names:
173
  df = pd.read_excel(file_path, sheet_name=sheet_name)
174
  processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
 
178
  raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
179
 
180
  # Create visualizations
181
+ distribution_plot, summary_table = create_comparison_charts(sentiment_results)
182
 
183
+ # Save results
184
  output_path = "sentiment_analysis_results.xlsx"
185
  with pd.ExcelWriter(output_path) as writer:
 
186
  for sheet_name, df in all_processed_dfs.items():
187
  df.to_excel(writer, sheet_name=sheet_name, index=False)
188
+ summary_table.to_excel(writer, sheet_name='Summary', index=False)
 
 
189
 
190
  return (
191
+ distribution_plot,
 
192
  summary_table,
193
  output_path
194
  )
 
198
 
199
 
200
  # Create Gradio interface
201
+ # In the Gradio interface section
202
+ def create_comparison_charts(sentiment_results):
203
+ """
204
+ Create simplified, investment-focused comparison charts
205
+ """
206
+ # Prepare data
207
+ plot_data = []
208
+ for product, sentiment_counts in sentiment_results.items():
209
+ sentiment_dict = sentiment_counts.to_dict()
210
+ total = sum(sentiment_dict.values())
211
+
212
+ row = {
213
+ 'Product': product,
214
+ 'Total Reviews': total
215
+ }
216
+ for sentiment, count in sentiment_dict.items():
217
+ row[sentiment] = (count / total) * 100
218
+ plot_data.append(row)
219
+
220
+ df = pd.DataFrame(plot_data)
221
+
222
+ # Ensure all sentiment columns exist
223
+ for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
224
+ if sentiment not in df.columns:
225
+ df[sentiment] = 0
226
+
227
+ # 1. Simple Stacked Bar Chart showing sentiment distribution
228
+ stack_fig = go.Figure()
229
+ sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative']
230
+ colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)',
231
+ 'rgb(241, 196, 15)', 'rgb(231, 76, 60)',
232
+ 'rgb(192, 57, 43)']
233
+
234
+ for sentiment, color in zip(sentiments, colors):
235
+ stack_fig.add_trace(go.Bar(
236
+ name=sentiment,
237
+ x=df['Product'],
238
+ y=df[sentiment],
239
+ marker_color=color
240
+ ))
241
+
242
+ stack_fig.update_layout(
243
+ barmode='stack',
244
+ title='Sentiment Distribution by Product',
245
+ yaxis_title='Percentage (%)'
246
+ )
247
+
248
+ # 2. Aggregated Sentiment Ratios for Quick Comparison
249
+ df['Positive_Total'] = df[['Positive', 'Very Positive']].sum(axis=1)
250
+ df['Negative_Total'] = df[['Negative', 'Very Negative']].sum(axis=1)
251
+
252
+ summary_df = pd.DataFrame({
253
+ 'Product': df['Product'],
254
+ 'Total Reviews': df['Total Reviews'],
255
+ 'Positive (%)': df['Positive_Total'].round(2),
256
+ 'Neutral (%)': df['Neutral'].round(2),
257
+ 'Negative (%)': df['Negative_Total'].round(2)
258
+ })
259
+
260
+ # Sort by Positive percentage for easy comparison
261
+ summary_df = summary_df.sort_values('Positive (%)', ascending=False)
262
+
263
+ return stack_fig, summary_df
264
+
265
+
266
+ # Update the Gradio interface
267
  with gr.Blocks() as interface:
268
+ gr.Markdown("# Product Review Sentiment Analysis")
269
+
270
  gr.Markdown("""
271
+ ### Quick Guide
272
+ 1. **Excel File (Multiple Products)**:
273
+ - Create separate sheets for each product
274
+ - Name sheets with product/company names
275
+ - Include "Reviews" column in each sheet
276
+
277
+ 2. **CSV File (Single Product)**:
278
+ - Include "Reviews" column
279
+
280
+ Upload your file and click Analyze to get started.
281
  """)
282
 
283
  with gr.Row():
 
290
  analyze_btn = gr.Button("Analyze Sentiments")
291
 
292
  with gr.Row():
293
+ distribution_plot = gr.Plot(label="Sentiment Distribution")
 
294
 
295
  with gr.Row():
296
+ summary_table = gr.Dataframe(label="Summary Metrics")
297
 
298
  with gr.Row():
299
+ output_file = gr.File(label="Download Full Report")
300
 
301
  analyze_btn.click(
302
  fn=process_file,
303
  inputs=[file_input],
304
+ outputs=[distribution_plot, summary_table, output_file]
305
  )
306
 
307
+ # launch interface
308
+ interface.launch()