nlpblogs commited on
Commit
712bcb0
·
verified ·
1 Parent(s): 8ae1eff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -6
app.py CHANGED
@@ -134,17 +134,68 @@ if st.button("Sentiment Analysis", type="secondary"):
134
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
135
  user_id = 1
136
  for comment in comments:
137
- timestamp = datetime.now().strftime("%Y-%m-%d")
138
- data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
139
  user_id += 1
140
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
141
  except Exception as e:
142
  st.error(f"Exception during comment extraction: {e}")
143
  driver.quit()
144
- df1 = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
145
- st.dataframe(df1)
146
- df = df1.dropna(subset=['Comment'])
147
- st.dataframe(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
 
150
 
 
134
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
135
  user_id = 1
136
  for comment in comments:
137
+
138
+ data.append({"Comment": comment.text})
139
  user_id += 1
140
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
141
  except Exception as e:
142
  st.error(f"Exception during comment extraction: {e}")
143
  driver.quit()
144
+ df = pd.DataFrame(data, columns=["Comment"])
145
+
146
+
147
+
148
+ if tokenizer and model:
149
+ inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
150
+ with torch.no_grad():
151
+ logits = model(**inputs).logits
152
+ predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
153
+ predicted_labels = predicted_probabilities.argmax(dim=1)
154
+ results = []
155
+ for i, label in enumerate(predicted_labels):
156
+ results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
157
+ sentiment_df = pd.DataFrame(results)
158
+
159
+ value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
160
+ final_df = value_counts1
161
+ tab1, tab2 = st.tabs(["Pie Chart", "Word Cloud"])
162
+ with tab1:
163
+ fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
164
+ fig1.update_traces(textposition='inside', textinfo='percent+label')
165
+ st.plotly_chart(fig1)
166
+
167
+ with tab2:
168
+ text = " ".join(comment for comment in df['Comment'])
169
+ stopwords_set = set(stopwords.words('english')) # Correct import and usage
170
+ text = re.sub('[^A-Za-z]+', ' ', text)
171
+ words = text.split()
172
+ clean_text = [word for word in words if word.lower() not in stopwords_set]
173
+ clean_text = ' '.join(clean_text)
174
+ wc = WordCloud(width=3000, height=2000, background_color='black', colormap='Pastel1', collocations=False).generate(clean_text)
175
+ fig = plt.figure(figsize=(40, 30))
176
+ plt.imshow(wc)
177
+ plt.axis('off')
178
+ st.pyplot(fig)
179
+
180
+
181
+ csv = result.to_csv(index=False)
182
+ st.download_button(
183
+ label="Download data as CSV",
184
+ data=csv,
185
+ file_name='Summary of the results.csv',
186
+ mime='text/csv',
187
+ )
188
+
189
+ else:
190
+ st.warning("Please enter a URL.")
191
+ else:
192
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
193
+
194
+ if 'url_count' in st.session_state:
195
+ st.write(f"URL pasted {st.session_state['url_count']} times.")
196
+
197
+
198
+
199
 
200
 
201