nlpblogs commited on
Commit
ea96c2b
·
verified ·
1 Parent(s): c425874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -27
app.py CHANGED
@@ -70,8 +70,17 @@ with st.sidebar:
70
  ''')
71
 
72
 
 
 
 
73
 
 
 
74
 
 
 
 
 
75
 
76
  def clear_question():
77
  st.session_state["url"] = ""
@@ -98,7 +107,7 @@ if st.button("Sentiment Analysis", type="secondary"):
98
  placeholder = st.empty()
99
  progress_bar = st.progress(0)
100
 
101
- for item in range(30):
102
  try:
103
  body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
104
  body.send_keys(Keys.END)
@@ -112,34 +121,63 @@ if st.button("Sentiment Analysis", type="secondary"):
112
  placeholder.text("Scrolling complete.")
113
  progress_bar.empty()
114
 
115
- videos = driver.find_elements(By.CLASS_NAME, 'style-scope.ytd-grid-video-renderer')
116
- youtube_videos = []
117
- for video in videos:
118
- title = video.find_element(By.XPATH, './/*[@id="video-title"]').text
119
- views = video.find_element(By.XPATH, './/*[@id="metadata-line"]/span[1]').text
120
- date = video.find_element(By.XPATH, './/*[@id="metadata-line"]/span[2]').text
121
- video_link = video.find_element(By.XPATH, './/*[@id="thumbnail"]/@href')
122
- vid_items = {
123
- 'Title': title,
124
- 'Views': views,
125
- 'Posted': date,
126
- 'Link': video_link}
127
- youtube_videos.append(vid_items)
128
- df = pd.DataFrame(youtube_videos)
129
- st.dataframe(df)
130
-
131
-
132
-
133
-
134
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
 
 
 
 
136
 
137
-
138
-
139
-
140
 
141
 
142
 
143
-
144
-
145
-
 
70
  ''')
71
 
72
 
73
+ st.subheader("YouTube Comments Sentiment Analysis", divider="red")
74
+ tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
75
+ model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
76
 
77
+ if 'url_count' not in st.session_state:
78
+ st.session_state['url_count'] = 0
79
 
80
+ max_attempts = 2
81
+
82
+ def update_url_count():
83
+ st.session_state['url_count'] += 1
84
 
85
  def clear_question():
86
  st.session_state["url"] = ""
 
107
  placeholder = st.empty()
108
  progress_bar = st.progress(0)
109
 
110
+ for item in range(150):
111
  try:
112
  body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
113
  body.send_keys(Keys.END)
 
121
  placeholder.text("Scrolling complete.")
122
  progress_bar.empty()
123
 
124
+ data = []
125
+ try:
126
+ wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
+ comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
128
+ user_id = 1
129
+ for comment in comments:
130
+ timestamp = datetime.now().strftime("%Y-%m-%d")
131
+ data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp.text})
132
+ user_id += 1
133
+ data = [dict(t) for t in {tuple(d.items()) for d in data}]
134
+ except Exception as e:
135
+ st.error(f"Exception during comment extraction: {e}")
136
+ driver.quit()
137
+ df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
138
+ st.dataframe(df)
139
+
140
+ if tokenizer and model:
141
+ inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
142
+ with torch.no_grad():
143
+ logits = model(**inputs).logits
144
+ predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
145
+ predicted_labels = predicted_probabilities.argmax(dim=1)
146
+ results = []
147
+ for i, label in enumerate(predicted_labels):
148
+ results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
149
+ sentiment_df = pd.DataFrame(results)
150
+
151
+ value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
152
+ final_df = value_counts1
153
+ tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
154
+ with tab1:
155
+ fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
156
+ fig1.update_traces(textposition='inside', textinfo='percent+label')
157
+ st.plotly_chart(fig1)
158
+
159
+ result = pd.concat([df, sentiment_df], axis=1)
160
+ st.dataframe(result)
161
+
162
+ with tab2:
163
+ fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
164
+ st.plotly_chart(fig2)
165
+
166
+ csv = result.to_csv(index=False)
167
+ st.download_button(
168
+ label="Download data as CSV",
169
+ data=csv,
170
+ file_name='Summary of the results.csv',
171
+ mime='text/csv',
172
+ )
173
 
174
+ else:
175
+ st.warning("Please enter a URL.")
176
+ else:
177
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
178
 
179
+ if 'url_count' in st.session_state: #added if statement.
180
+ st.write(f"URL pasted {st.session_state['url_count']} times.")
 
181
 
182
 
183