Update app.py
Browse files
app.py
CHANGED
@@ -134,17 +134,68 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
134 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
135 |
user_id = 1
|
136 |
for comment in comments:
|
137 |
-
|
138 |
-
data.append({"
|
139 |
user_id += 1
|
140 |
data = [dict(t) for t in {tuple(d.items()) for d in data}]
|
141 |
except Exception as e:
|
142 |
st.error(f"Exception during comment extraction: {e}")
|
143 |
driver.quit()
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
|
150 |
|
|
|
134 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
135 |
user_id = 1
|
136 |
for comment in comments:
|
137 |
+
|
138 |
+
data.append({"Comment": comment.text})
|
139 |
user_id += 1
|
140 |
data = [dict(t) for t in {tuple(d.items()) for d in data}]
|
141 |
except Exception as e:
|
142 |
st.error(f"Exception during comment extraction: {e}")
|
143 |
driver.quit()
|
144 |
+
df = pd.DataFrame(data, columns=["Comment"])
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
+
if tokenizer and model:
|
149 |
+
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
150 |
+
with torch.no_grad():
|
151 |
+
logits = model(**inputs).logits
|
152 |
+
predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
153 |
+
predicted_labels = predicted_probabilities.argmax(dim=1)
|
154 |
+
results = []
|
155 |
+
for i, label in enumerate(predicted_labels):
|
156 |
+
results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
|
157 |
+
sentiment_df = pd.DataFrame(results)
|
158 |
+
|
159 |
+
value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
|
160 |
+
final_df = value_counts1
|
161 |
+
tab1, tab2 = st.tabs(["Pie Chart", "Word Cloud"])
|
162 |
+
with tab1:
|
163 |
+
fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
|
164 |
+
fig1.update_traces(textposition='inside', textinfo='percent+label')
|
165 |
+
st.plotly_chart(fig1)
|
166 |
+
|
167 |
+
with tab2:
|
168 |
+
text = " ".join(comment for comment in df['Comment'])
|
169 |
+
stopwords_set = set(stopwords.words('english')) # Correct import and usage
|
170 |
+
text = re.sub('[^A-Za-z]+', ' ', text)
|
171 |
+
words = text.split()
|
172 |
+
clean_text = [word for word in words if word.lower() not in stopwords_set]
|
173 |
+
clean_text = ' '.join(clean_text)
|
174 |
+
wc = WordCloud(width=3000, height=2000, background_color='black', colormap='Pastel1', collocations=False).generate(clean_text)
|
175 |
+
fig = plt.figure(figsize=(40, 30))
|
176 |
+
plt.imshow(wc)
|
177 |
+
plt.axis('off')
|
178 |
+
st.pyplot(fig)
|
179 |
+
|
180 |
+
|
181 |
+
csv = result.to_csv(index=False)
|
182 |
+
st.download_button(
|
183 |
+
label="Download data as CSV",
|
184 |
+
data=csv,
|
185 |
+
file_name='Summary of the results.csv',
|
186 |
+
mime='text/csv',
|
187 |
+
)
|
188 |
+
|
189 |
+
else:
|
190 |
+
st.warning("Please enter a URL.")
|
191 |
+
else:
|
192 |
+
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
193 |
+
|
194 |
+
if 'url_count' in st.session_state:
|
195 |
+
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
196 |
+
|
197 |
+
|
198 |
+
|
199 |
|
200 |
|
201 |
|