nlpblogs commited on
Commit
30bf2ff
·
verified ·
1 Parent(s): eae5fab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -84
app.py CHANGED
@@ -70,12 +70,11 @@ with st.sidebar:
70
  ''')
71
 
72
 
73
- st.subheader("YouTube Comments Sentiment Analysis", divider = "red")
74
-
75
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
76
  model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
77
 
78
-
79
  if 'url_count' not in st.session_state:
80
  st.session_state['url_count'] = 0
81
 
@@ -105,94 +104,81 @@ if st.button("Sentiment Analysis", type="secondary"):
105
  data = []
106
  wait = WebDriverWait(driver, 30)
107
  driver.get(url)
108
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  st.warning("Failed to scrape reviews.")
110
- update_url_count() # Correctly indented
111
  else:
112
  st.warning("Please enter a URL.")
113
  else:
114
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
115
 
116
  st.write(f"URL pasted {st.session_state['url_count']} times.")
117
-
118
-
119
-
120
-
121
- placeholder = st.empty() # Create an empty placeholder for dynamic text
122
- progress_bar = st.progress(0) # Create a progress bar
123
-
124
- for item in range(150):
125
- try:
126
- body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
127
- body.send_keys(Keys.END)
128
- placeholder.text(f"Scrolled {item + 1} times") # Update placeholder text
129
- progress_bar.progress((item + 1) / 150) # Update progress bar
130
- time.sleep(3) # Increased sleep time for better loading
131
- except Exception as e:
132
- st.error(f"Exception during scrolling: {e}")
133
- break
134
-
135
- placeholder.text("Scrolling complete.") #show completion message.
136
- progress_bar.empty() #remove progress bar.
137
-
138
-
139
- data = []
140
- try:
141
- wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
142
- comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
143
-
144
- user_id = 1 # Initialize unique user ID
145
- for comment in comments:
146
- timestamp = datetime.now().strftime("%Y-%m-%d")
147
- data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
148
- user_id += 1
149
- data = [dict(t) for t in {tuple(d.items()) for d in data}]
150
-
151
- except Exception as e:
152
- st.error(f"Exception during comment extraction: {e}")
153
-
154
- driver.quit()
155
- df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
156
- st.dataframe(df)
157
-
158
- if tokenizer and model:
159
- inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
160
- with torch.no_grad():
161
- logits = model(**inputs).logits
162
- predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
163
- predicted_labels = predicted_probabilities.argmax(dim=1)
164
- results = []
165
- for i, label in enumerate(predicted_labels):
166
- results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
167
- sentiment_df = pd.DataFrame(results)
168
-
169
-
170
- value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
171
- final_df = value_counts1
172
-
173
- tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
174
- with tab1:
175
- fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
176
- fig1.update_traces(textposition='inside', textinfo='percent+label')
177
- st.plotly_chart(fig1)
178
-
179
-
180
- result = pd.concat([df, sentiment_df], axis=1)
181
- st.dataframe(result)
182
-
183
-
184
- with tab2:
185
- fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
186
- st.plotly_chart(fig2)
187
-
188
- csv = result.to_csv(index=False)
189
- st.download_button(
190
- label="Download data as CSV",
191
- data=csv,
192
- file_name='Summary of the results.csv',
193
- mime='text/csv',
194
- )
195
-
196
 
197
 
198
 
 
70
  ''')
71
 
72
 
73
+
74
+ st.subheader("YouTube Comments Sentiment Analysis", divider="red")
75
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
76
  model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
77
 
 
78
  if 'url_count' not in st.session_state:
79
  st.session_state['url_count'] = 0
80
 
 
104
  data = []
105
  wait = WebDriverWait(driver, 30)
106
  driver.get(url)
107
+
108
+ placeholder = st.empty()
109
+ progress_bar = st.progress(0)
110
+
111
+ for item in range(150):
112
+ try:
113
+ body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
114
+ body.send_keys(Keys.END)
115
+ placeholder.text(f"Scrolled {item + 1} times")
116
+ progress_bar.progress((item + 1) / 150)
117
+ time.sleep(3)
118
+ except Exception as e:
119
+ st.error(f"Exception during scrolling: {e}")
120
+ break
121
+
122
+ placeholder.text("Scrolling complete.")
123
+ progress_bar.empty()
124
+
125
+ data = []
126
+ try:
127
+ wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
128
+ comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
129
+ user_id = 1
130
+ for comment in comments:
131
+ timestamp = datetime.now().strftime("%Y-%m-%d")
132
+ data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
133
+ user_id += 1
134
+ data = [dict(t) for t in {tuple(d.items()) for d in data}]
135
+ except Exception as e:
136
+ st.error(f"Exception during comment extraction: {e}")
137
+ driver.quit()
138
+ df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
139
+ st.dataframe(df)
140
+
141
+ if tokenizer and model:
142
+ inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
143
+ with torch.no_grad():
144
+ logits = model(**inputs).logits
145
+ predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
146
+ predicted_labels = predicted_probabilities.argmax(dim=1)
147
+ results = []
148
+ for i, label in enumerate(predicted_labels):
149
+ results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
150
+ sentiment_df = pd.DataFrame(results)
151
+
152
+ value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
153
+ final_df = value_counts1
154
+ tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
155
+ with tab1:
156
+ fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
157
+ fig1.update_traces(textposition='inside', textinfo='percent+label')
158
+ st.plotly_chart(fig1)
159
+
160
+ result = pd.concat([df, sentiment_df], axis=1)
161
+ st.dataframe(result)
162
+
163
+ with tab2:
164
+ fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
165
+ st.plotly_chart(fig2)
166
+
167
+ csv = result.to_csv(index=False)
168
+ st.download_button(
169
+ label="Download data as CSV",
170
+ data=csv,
171
+ file_name='Summary of the results.csv',
172
+ mime='text/csv',
173
+ )
174
  st.warning("Failed to scrape reviews.")
175
+ update_url_count()
176
  else:
177
  st.warning("Please enter a URL.")
178
  else:
179
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
180
 
181
  st.write(f"URL pasted {st.session_state['url_count']} times.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
 
184