new new graphs and summaries
Browse files
app.py
CHANGED
@@ -49,59 +49,106 @@ def process_single_sheet(df, product_name):
|
|
49 |
|
50 |
def create_comparison_charts(sentiment_results):
|
51 |
"""
|
52 |
-
Create comparison charts for different products
|
53 |
-
Returns two plotly figures: bar chart and pie chart
|
54 |
"""
|
55 |
# Prepare data for plotting
|
56 |
-
|
57 |
-
sentiments = []
|
58 |
-
counts = []
|
59 |
-
|
60 |
for product, sentiment_counts in sentiment_results.items():
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
})
|
71 |
|
72 |
-
#
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
# Create pie chart for overall sentiment distribution
|
82 |
-
pie_fig = px.pie(plot_df,
|
83 |
-
values='Count',
|
84 |
-
names='Sentiment',
|
85 |
-
title='Overall Sentiment Distribution',
|
86 |
-
color_discrete_sequence=px.colors.qualitative.Set3)
|
87 |
-
|
88 |
-
# Create summary table
|
89 |
-
summary_df = plot_df.pivot_table(
|
90 |
-
values='Count',
|
91 |
-
index='Product',
|
92 |
-
columns='Sentiment',
|
93 |
-
fill_value=0
|
94 |
-
).round(2)
|
95 |
-
|
96 |
-
# Add total reviews column
|
97 |
-
summary_df['Total Reviews'] = summary_df.sum(axis=1)
|
98 |
-
|
99 |
-
# Calculate percentage of positive reviews (Positive + Very Positive)
|
100 |
-
positive_cols = ['Positive', 'Very Positive']
|
101 |
-
positive_cols = [col for col in positive_cols if col in summary_df.columns]
|
102 |
-
summary_df['Positive Ratio'] = (summary_df[positive_cols].sum(axis=1) / summary_df['Total Reviews'] * 100).round(2)
|
103 |
-
|
104 |
-
return bar_fig, pie_fig, summary_df
|
105 |
|
106 |
|
107 |
def process_file(file_obj):
|
@@ -114,7 +161,6 @@ def process_file(file_obj):
|
|
114 |
all_processed_dfs = {}
|
115 |
|
116 |
if file_path.endswith('.csv'):
|
117 |
-
# Process single CSV file
|
118 |
df = pd.read_csv(file_path)
|
119 |
product_name = "Product" # Default name for CSV
|
120 |
processed_df, sentiment_counts = process_single_sheet(df, product_name)
|
@@ -122,9 +168,7 @@ def process_file(file_obj):
|
|
122 |
sentiment_results[product_name] = sentiment_counts
|
123 |
|
124 |
elif file_path.endswith(('.xlsx', '.xls')):
|
125 |
-
# Process multiple sheets in Excel file
|
126 |
excel_file = pd.ExcelFile(file_path)
|
127 |
-
|
128 |
for sheet_name in excel_file.sheet_names:
|
129 |
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
130 |
processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
|
@@ -134,21 +178,17 @@ def process_file(file_obj):
|
|
134 |
raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
|
135 |
|
136 |
# Create visualizations
|
137 |
-
|
138 |
|
139 |
-
# Save results
|
140 |
output_path = "sentiment_analysis_results.xlsx"
|
141 |
with pd.ExcelWriter(output_path) as writer:
|
142 |
-
# Save processed data
|
143 |
for sheet_name, df in all_processed_dfs.items():
|
144 |
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
145 |
-
|
146 |
-
# Save summary
|
147 |
-
summary_table.to_excel(writer, sheet_name='Summary', index=True)
|
148 |
|
149 |
return (
|
150 |
-
|
151 |
-
pie_chart,
|
152 |
summary_table,
|
153 |
output_path
|
154 |
)
|
@@ -158,12 +198,86 @@ def process_file(file_obj):
|
|
158 |
|
159 |
|
160 |
# Create Gradio interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
with gr.Blocks() as interface:
|
162 |
-
gr.Markdown("#
|
|
|
163 |
gr.Markdown("""
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
""")
|
168 |
|
169 |
with gr.Row():
|
@@ -176,20 +290,19 @@ with gr.Blocks() as interface:
|
|
176 |
analyze_btn = gr.Button("Analyze Sentiments")
|
177 |
|
178 |
with gr.Row():
|
179 |
-
|
180 |
-
pie_plot = gr.Plot(label="Overall Sentiment Distribution")
|
181 |
|
182 |
with gr.Row():
|
183 |
-
summary_table = gr.Dataframe(label="Summary
|
184 |
|
185 |
with gr.Row():
|
186 |
-
output_file = gr.File(label="Download
|
187 |
|
188 |
analyze_btn.click(
|
189 |
fn=process_file,
|
190 |
inputs=[file_input],
|
191 |
-
outputs=[
|
192 |
)
|
193 |
|
194 |
-
#
|
195 |
-
interface.launch()
|
|
|
49 |
|
50 |
def create_comparison_charts(sentiment_results):
|
51 |
"""
|
52 |
+
Create investment-focused comparison charts for different products
|
|
|
53 |
"""
|
54 |
# Prepare data for plotting
|
55 |
+
plot_data = []
|
|
|
|
|
|
|
56 |
for product, sentiment_counts in sentiment_results.items():
|
57 |
+
# Convert to dictionary and get sum
|
58 |
+
sentiment_dict = sentiment_counts.to_dict()
|
59 |
+
total = sum(sentiment_dict.values())
|
60 |
+
|
61 |
+
row = {
|
62 |
+
'Product': product,
|
63 |
+
'Total Reviews': total
|
64 |
+
}
|
65 |
+
# Calculate percentages for each sentiment
|
66 |
+
for sentiment, count in sentiment_dict.items():
|
67 |
+
row[sentiment] = (count / total) * 100
|
68 |
+
plot_data.append(row)
|
69 |
+
|
70 |
+
df = pd.DataFrame(plot_data)
|
71 |
+
|
72 |
+
# Ensure all sentiment columns exist (in case some products don't have all sentiments)
|
73 |
+
for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
|
74 |
+
if sentiment not in df.columns:
|
75 |
+
df[sentiment] = 0
|
76 |
+
|
77 |
+
# Calculate weighted sentiment score (0 to 100)
|
78 |
+
sentiment_weights = {
|
79 |
+
'Very Negative': 0,
|
80 |
+
'Negative': 25,
|
81 |
+
'Neutral': 50,
|
82 |
+
'Positive': 75,
|
83 |
+
'Very Positive': 100
|
84 |
+
}
|
85 |
+
|
86 |
+
df['Sentiment Score'] = 0
|
87 |
+
for product in df['Product']:
|
88 |
+
score = 0
|
89 |
+
for sentiment, weight in sentiment_weights.items():
|
90 |
+
if sentiment in df.columns:
|
91 |
+
score += (df.loc[df['Product'] == product, sentiment].iloc[0] * weight / 100)
|
92 |
+
df.loc[df['Product'] == product, 'Sentiment Score'] = round(score, 2)
|
93 |
+
|
94 |
+
# Create sentiment score chart
|
95 |
+
score_fig = go.Figure()
|
96 |
+
score_fig.add_trace(go.Bar(
|
97 |
+
x=df['Product'],
|
98 |
+
y=df['Sentiment Score'],
|
99 |
+
text=df['Sentiment Score'].round(1),
|
100 |
+
textposition='auto',
|
101 |
+
marker_color='rgb(65, 105, 225)'
|
102 |
+
))
|
103 |
+
score_fig.update_layout(
|
104 |
+
title='Overall Sentiment Score by Product (0-100)',
|
105 |
+
yaxis_title='Weighted Sentiment Score',
|
106 |
+
yaxis_range=[0, 100],
|
107 |
+
showlegend=False
|
108 |
+
)
|
109 |
+
|
110 |
+
# Calculate Positive-Negative Ratios
|
111 |
+
df['Positive Ratio'] = df[['Positive', 'Very Positive']].sum(axis=1)
|
112 |
+
df['Negative Ratio'] = df[['Negative', 'Very Negative']].sum(axis=1)
|
113 |
+
|
114 |
+
# Create Positive-Negative ratio chart
|
115 |
+
ratio_fig = go.Figure()
|
116 |
+
ratio_fig.add_trace(go.Bar(
|
117 |
+
name='Positive',
|
118 |
+
x=df['Product'],
|
119 |
+
y=df['Positive Ratio'],
|
120 |
+
marker_color='rgb(50, 205, 50)'
|
121 |
+
))
|
122 |
+
ratio_fig.add_trace(go.Bar(
|
123 |
+
name='Negative',
|
124 |
+
x=df['Product'],
|
125 |
+
y=df['Negative Ratio'],
|
126 |
+
marker_color='rgb(220, 20, 60)'
|
127 |
+
))
|
128 |
+
ratio_fig.update_layout(
|
129 |
+
barmode='group',
|
130 |
+
title='Positive vs Negative Sentiment Ratio by Product',
|
131 |
+
yaxis_title='Percentage (%)'
|
132 |
+
)
|
133 |
+
|
134 |
+
# Create summary table with investment-relevant metrics
|
135 |
+
summary_df = pd.DataFrame({
|
136 |
+
'Product': df['Product'],
|
137 |
+
'Total Reviews': df['Total Reviews'],
|
138 |
+
'Sentiment Score (0-100)': df['Sentiment Score'],
|
139 |
+
'Positive Ratio (%)': df['Positive Ratio'].round(2),
|
140 |
+
'Negative Ratio (%)': df['Negative Ratio'].round(2),
|
141 |
+
'Neutral Ratio (%)': df['Neutral'].round(2)
|
142 |
})
|
143 |
|
144 |
+
# Calculate Confidence Score (avoiding division by zero)
|
145 |
+
summary_df['Confidence Score'] = ((summary_df['Positive Ratio (%)'] + summary_df['Negative Ratio (%)']) /
|
146 |
+
summary_df['Neutral Ratio (%)'].replace(0, 0.001)).round(2)
|
147 |
+
|
148 |
+
# Sort by Sentiment Score for easy comparison
|
149 |
+
summary_df = summary_df.sort_values('Sentiment Score (0-100)', ascending=False)
|
150 |
+
|
151 |
+
return score_fig, ratio_fig, summary_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
|
154 |
def process_file(file_obj):
|
|
|
161 |
all_processed_dfs = {}
|
162 |
|
163 |
if file_path.endswith('.csv'):
|
|
|
164 |
df = pd.read_csv(file_path)
|
165 |
product_name = "Product" # Default name for CSV
|
166 |
processed_df, sentiment_counts = process_single_sheet(df, product_name)
|
|
|
168 |
sentiment_results[product_name] = sentiment_counts
|
169 |
|
170 |
elif file_path.endswith(('.xlsx', '.xls')):
|
|
|
171 |
excel_file = pd.ExcelFile(file_path)
|
|
|
172 |
for sheet_name in excel_file.sheet_names:
|
173 |
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
174 |
processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
|
|
|
178 |
raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
|
179 |
|
180 |
# Create visualizations
|
181 |
+
distribution_plot, summary_table = create_comparison_charts(sentiment_results)
|
182 |
|
183 |
+
# Save results
|
184 |
output_path = "sentiment_analysis_results.xlsx"
|
185 |
with pd.ExcelWriter(output_path) as writer:
|
|
|
186 |
for sheet_name, df in all_processed_dfs.items():
|
187 |
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
188 |
+
summary_table.to_excel(writer, sheet_name='Summary', index=False)
|
|
|
|
|
189 |
|
190 |
return (
|
191 |
+
distribution_plot,
|
|
|
192 |
summary_table,
|
193 |
output_path
|
194 |
)
|
|
|
198 |
|
199 |
|
200 |
# Create Gradio interface
|
201 |
+
# In the Gradio interface section
|
202 |
+
def create_comparison_charts(sentiment_results):
|
203 |
+
"""
|
204 |
+
Create simplified, investment-focused comparison charts
|
205 |
+
"""
|
206 |
+
# Prepare data
|
207 |
+
plot_data = []
|
208 |
+
for product, sentiment_counts in sentiment_results.items():
|
209 |
+
sentiment_dict = sentiment_counts.to_dict()
|
210 |
+
total = sum(sentiment_dict.values())
|
211 |
+
|
212 |
+
row = {
|
213 |
+
'Product': product,
|
214 |
+
'Total Reviews': total
|
215 |
+
}
|
216 |
+
for sentiment, count in sentiment_dict.items():
|
217 |
+
row[sentiment] = (count / total) * 100
|
218 |
+
plot_data.append(row)
|
219 |
+
|
220 |
+
df = pd.DataFrame(plot_data)
|
221 |
+
|
222 |
+
# Ensure all sentiment columns exist
|
223 |
+
for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
|
224 |
+
if sentiment not in df.columns:
|
225 |
+
df[sentiment] = 0
|
226 |
+
|
227 |
+
# 1. Simple Stacked Bar Chart showing sentiment distribution
|
228 |
+
stack_fig = go.Figure()
|
229 |
+
sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative']
|
230 |
+
colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)',
|
231 |
+
'rgb(241, 196, 15)', 'rgb(231, 76, 60)',
|
232 |
+
'rgb(192, 57, 43)']
|
233 |
+
|
234 |
+
for sentiment, color in zip(sentiments, colors):
|
235 |
+
stack_fig.add_trace(go.Bar(
|
236 |
+
name=sentiment,
|
237 |
+
x=df['Product'],
|
238 |
+
y=df[sentiment],
|
239 |
+
marker_color=color
|
240 |
+
))
|
241 |
+
|
242 |
+
stack_fig.update_layout(
|
243 |
+
barmode='stack',
|
244 |
+
title='Sentiment Distribution by Product',
|
245 |
+
yaxis_title='Percentage (%)'
|
246 |
+
)
|
247 |
+
|
248 |
+
# 2. Aggregated Sentiment Ratios for Quick Comparison
|
249 |
+
df['Positive_Total'] = df[['Positive', 'Very Positive']].sum(axis=1)
|
250 |
+
df['Negative_Total'] = df[['Negative', 'Very Negative']].sum(axis=1)
|
251 |
+
|
252 |
+
summary_df = pd.DataFrame({
|
253 |
+
'Product': df['Product'],
|
254 |
+
'Total Reviews': df['Total Reviews'],
|
255 |
+
'Positive (%)': df['Positive_Total'].round(2),
|
256 |
+
'Neutral (%)': df['Neutral'].round(2),
|
257 |
+
'Negative (%)': df['Negative_Total'].round(2)
|
258 |
+
})
|
259 |
+
|
260 |
+
# Sort by Positive percentage for easy comparison
|
261 |
+
summary_df = summary_df.sort_values('Positive (%)', ascending=False)
|
262 |
+
|
263 |
+
return stack_fig, summary_df
|
264 |
+
|
265 |
+
|
266 |
+
# Update the Gradio interface
|
267 |
with gr.Blocks() as interface:
|
268 |
+
gr.Markdown("# Product Review Sentiment Analysis")
|
269 |
+
|
270 |
gr.Markdown("""
|
271 |
+
### Quick Guide
|
272 |
+
1. **Excel File (Multiple Products)**:
|
273 |
+
- Create separate sheets for each product
|
274 |
+
- Name sheets with product/company names
|
275 |
+
- Include "Reviews" column in each sheet
|
276 |
+
|
277 |
+
2. **CSV File (Single Product)**:
|
278 |
+
- Include "Reviews" column
|
279 |
+
|
280 |
+
Upload your file and click Analyze to get started.
|
281 |
""")
|
282 |
|
283 |
with gr.Row():
|
|
|
290 |
analyze_btn = gr.Button("Analyze Sentiments")
|
291 |
|
292 |
with gr.Row():
|
293 |
+
distribution_plot = gr.Plot(label="Sentiment Distribution")
|
|
|
294 |
|
295 |
with gr.Row():
|
296 |
+
summary_table = gr.Dataframe(label="Summary Metrics")
|
297 |
|
298 |
with gr.Row():
|
299 |
+
output_file = gr.File(label="Download Full Report")
|
300 |
|
301 |
analyze_btn.click(
|
302 |
fn=process_file,
|
303 |
inputs=[file_input],
|
304 |
+
outputs=[distribution_plot, summary_table, output_file]
|
305 |
)
|
306 |
|
307 |
+
# launch interface
|
308 |
+
interface.launch()
|