Update app.py
Browse files
app.py
CHANGED
@@ -70,8 +70,17 @@ with st.sidebar:
|
|
70 |
''')
|
71 |
|
72 |
|
|
|
|
|
|
|
73 |
|
|
|
|
|
74 |
|
|
|
|
|
|
|
|
|
75 |
|
76 |
def clear_question():
|
77 |
st.session_state["url"] = ""
|
@@ -98,7 +107,7 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
98 |
placeholder = st.empty()
|
99 |
progress_bar = st.progress(0)
|
100 |
|
101 |
-
for item in range(
|
102 |
try:
|
103 |
body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
|
104 |
body.send_keys(Keys.END)
|
@@ -112,34 +121,63 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
112 |
placeholder.text("Scrolling complete.")
|
113 |
progress_bar.empty()
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
|
141 |
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
70 |
''')
|
71 |
|
72 |
|
73 |
+
st.subheader("YouTube Comments Sentiment Analysis", divider="red")
|
74 |
+
tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
|
75 |
+
model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
|
76 |
|
77 |
+
if 'url_count' not in st.session_state:
|
78 |
+
st.session_state['url_count'] = 0
|
79 |
|
80 |
+
max_attempts = 2
|
81 |
+
|
82 |
+
def update_url_count():
|
83 |
+
st.session_state['url_count'] += 1
|
84 |
|
85 |
def clear_question():
|
86 |
st.session_state["url"] = ""
|
|
|
107 |
placeholder = st.empty()
|
108 |
progress_bar = st.progress(0)
|
109 |
|
110 |
+
for item in range(150):
|
111 |
try:
|
112 |
body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
|
113 |
body.send_keys(Keys.END)
|
|
|
121 |
placeholder.text("Scrolling complete.")
|
122 |
progress_bar.empty()
|
123 |
|
124 |
+
data = []
|
125 |
+
try:
|
126 |
+
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
127 |
+
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
128 |
+
user_id = 1
|
129 |
+
for comment in comments:
|
130 |
+
timestamp = datetime.now().strftime("%Y-%m-%d")
|
131 |
+
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp.text})
|
132 |
+
user_id += 1
|
133 |
+
data = [dict(t) for t in {tuple(d.items()) for d in data}]
|
134 |
+
except Exception as e:
|
135 |
+
st.error(f"Exception during comment extraction: {e}")
|
136 |
+
driver.quit()
|
137 |
+
df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
|
138 |
+
st.dataframe(df)
|
139 |
+
|
140 |
+
if tokenizer and model:
|
141 |
+
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
142 |
+
with torch.no_grad():
|
143 |
+
logits = model(**inputs).logits
|
144 |
+
predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
145 |
+
predicted_labels = predicted_probabilities.argmax(dim=1)
|
146 |
+
results = []
|
147 |
+
for i, label in enumerate(predicted_labels):
|
148 |
+
results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
|
149 |
+
sentiment_df = pd.DataFrame(results)
|
150 |
+
|
151 |
+
value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
|
152 |
+
final_df = value_counts1
|
153 |
+
tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
|
154 |
+
with tab1:
|
155 |
+
fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
|
156 |
+
fig1.update_traces(textposition='inside', textinfo='percent+label')
|
157 |
+
st.plotly_chart(fig1)
|
158 |
+
|
159 |
+
result = pd.concat([df, sentiment_df], axis=1)
|
160 |
+
st.dataframe(result)
|
161 |
+
|
162 |
+
with tab2:
|
163 |
+
fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
|
164 |
+
st.plotly_chart(fig2)
|
165 |
+
|
166 |
+
csv = result.to_csv(index=False)
|
167 |
+
st.download_button(
|
168 |
+
label="Download data as CSV",
|
169 |
+
data=csv,
|
170 |
+
file_name='Summary of the results.csv',
|
171 |
+
mime='text/csv',
|
172 |
+
)
|
173 |
|
174 |
+
else:
|
175 |
+
st.warning("Please enter a URL.")
|
176 |
+
else:
|
177 |
+
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
178 |
|
179 |
+
if 'url_count' in st.session_state: #added if statement.
|
180 |
+
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
|
|
181 |
|
182 |
|
183 |
|
|
|
|
|
|