nlpblogs commited on
Commit
feca4a5
·
verified ·
1 Parent(s): 54fb744

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -11
app.py CHANGED
@@ -32,6 +32,13 @@ import plotly.express as px
32
  import zipfile
33
  import torch
34
 
 
 
 
 
 
 
 
35
 
36
 
37
  with st.sidebar:
@@ -120,15 +127,76 @@ if st.button("Sentiment Analysis", type="secondary"):
120
 
121
  placeholder.text("Scrolling complete.")
122
  progress_bar.empty()
123
- dates = []
124
- wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
125
- comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
126
- st.write(comments)
127
- for comment in comments:
128
- comment_text = comment.text #Extract the text from the WebElement
129
- date_match = re.search(r'(\d+ (day|week|month|year)s? ago)|(\d{4}-\d{2}-\d{2})', comment_text)
130
- if date_match:
131
- date_string = date_match.group(0) #Get the matched date string.
132
- st.write(date_string) #for debug
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
-
 
 
 
 
 
 
 
32
  import zipfile
33
  import torch
34
 
35
+ import matplotlib.pyplot as plt
36
+ from wordcloud import WordCloud
37
+ from nltk.corpus import stopwords
38
+
39
+ import nltk
40
+ nltk.download('stopwords')
41
+
42
 
43
 
44
  with st.sidebar:
 
127
 
128
  placeholder.text("Scrolling complete.")
129
  progress_bar.empty()
130
+
131
+ data = []
132
+ try:
133
+ wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
134
+ comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
135
+ user_id = 1
136
+ for comment in comments:
137
+ timestamp = datetime.now().strftime("%Y-%m-%d")
138
+ data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
139
+ user_id += 1
140
+ data = [dict(t) for t in {tuple(d.items()) for d in data}]
141
+ except Exception as e:
142
+ st.error(f"Exception during comment extraction: {e}")
143
+ driver.quit()
144
+ df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
145
+ st.dataframe(df)
146
+
147
+ if tokenizer and model:
148
+ inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
149
+ with torch.no_grad():
150
+ logits = model(**inputs).logits
151
+ predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
152
+ predicted_labels = predicted_probabilities.argmax(dim=1)
153
+ results = []
154
+ for i, label in enumerate(predicted_labels):
155
+ results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
156
+ sentiment_df = pd.DataFrame(results)
157
+
158
+ value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
159
+ final_df = value_counts1
160
+ tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
161
+ with tab1:
162
+ fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
163
+ fig1.update_traces(textposition='inside', textinfo='percent+label')
164
+ st.plotly_chart(fig1)
165
+
166
+ result = pd.concat([df, sentiment_df], axis=1)
167
+ st.dataframe(result)
168
+
169
+ with tab2:
170
+ fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
171
+ st.plotly_chart(fig2)
172
+
173
+ text = " ".join(review for review in df['Review'])
174
+ stopwords = set(stopwords.words('english'))
175
+ text = re.sub('[^A-Za-z]+', ' ', text)
176
+ words = text.split()
177
+ clean_text = [word for word in words if word.lower() not in stopwords]
178
+ clean_text = ' '.join(clean_text)
179
+ stopwords = set(stopwords.words('english'))
180
+ wc = WordCloud(width=800, height=400, background_color='white').generate(clean_text)
181
+ fig = plt.figure(figsize=(12,6))
182
+ plt.imshow(wc, interpolation='bilinear')
183
+ plt.axis('off')
184
+
185
+ st.pyplot(fig)
186
+
187
+
188
+ csv = result.to_csv(index=False)
189
+ st.download_button(
190
+ label="Download data as CSV",
191
+ data=csv,
192
+ file_name='Summary of the results.csv',
193
+ mime='text/csv',
194
+ )
195
 
196
+ else:
197
+ st.warning("Please enter a URL.")
198
+ else:
199
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
200
+
201
+ if 'url_count' in st.session_state: #added if statement.
202
+ st.write(f"URL pasted {st.session_state['url_count']} times.")