nlpblogs commited on
Commit
16adfc0
·
verified ·
1 Parent(s): 76a4123

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -68
app.py CHANGED
@@ -70,17 +70,8 @@ with st.sidebar:
70
  ''')
71
 
72
 
73
- st.subheader("YouTube Comments Sentiment Analysis", divider="red")
74
- tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
75
- model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
76
 
77
- if 'url_count' not in st.session_state:
78
- st.session_state['url_count'] = 0
79
 
80
- max_attempts = 2
81
-
82
- def update_url_count():
83
- st.session_state['url_count'] += 1
84
 
85
  def clear_question():
86
  st.session_state["url"] = ""
@@ -121,67 +112,32 @@ if st.button("Sentiment Analysis", type="secondary"):
121
  placeholder.text("Scrolling complete.")
122
  progress_bar.empty()
123
 
124
- data = []
125
- try:
126
- wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
- comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
128
-
129
- user_id = 1
130
- for comment in comments:
131
- views = comment.find_element_by_xpath(by=By.XPATH, './/*[@id="metadata-line"]/span[1]').text
132
- timestamp = comment.find_element_by_xpath(by=By.XPATH, './/*[@id="metadata-line"]/span[2]').text
133
-
134
-
135
- data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
136
- user_id += 1
137
- data = [dict(t) for t in {tuple(d.items()) for d in data}]
138
- except Exception as e:
139
- st.error(f"Exception during comment extraction: {e}")
140
- driver.quit()
141
- df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
142
- st.dataframe(df)
143
-
144
- if tokenizer and model:
145
- inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
146
- with torch.no_grad():
147
- logits = model(**inputs).logits
148
- predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
149
- predicted_labels = predicted_probabilities.argmax(dim=1)
150
- results = []
151
- for i, label in enumerate(predicted_labels):
152
- results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
153
- sentiment_df = pd.DataFrame(results)
154
-
155
- value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
156
- final_df = value_counts1
157
- tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
158
- with tab1:
159
- fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
160
- fig1.update_traces(textposition='inside', textinfo='percent+label')
161
- st.plotly_chart(fig1)
162
-
163
- result = pd.concat([df, sentiment_df], axis=1)
164
- st.dataframe(result)
165
-
166
- with tab2:
167
- fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
168
- st.plotly_chart(fig2)
169
-
170
- csv = result.to_csv(index=False)
171
- st.download_button(
172
- label="Download data as CSV",
173
- data=csv,
174
- file_name='Summary of the results.csv',
175
- mime='text/csv',
176
- )
177
 
178
- else:
179
- st.warning("Please enter a URL.")
180
- else:
181
- st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
182
 
183
- if 'url_count' in st.session_state: #added if statement.
184
- st.write(f"URL pasted {st.session_state['url_count']} times.")
 
185
 
186
 
187
 
 
70
  ''')
71
 
72
 
 
 
 
73
 
 
 
74
 
 
 
 
 
75
 
76
  def clear_question():
77
  st.session_state["url"] = ""
 
112
  placeholder.text("Scrolling complete.")
113
  progress_bar.empty()
114
 
115
+ videos = driver.find_elements_by_class_name('style-scope ytd-grid-video-renderer')
116
+ youtube_videos = []
117
+ for video in videos:
118
+ link = url
119
+ title = video.find_element_by_xpath('.//*[@id="video-title"]').text
120
+ views = video.find_element_by_xpath('.//*[@id="metadata-line"]/span[1]').text
121
+ date = video.find_element_by_xpath('.//*[@id="metadata-line"]/span[2]').text
122
+ vid_items = {
123
+ 'Title': title,
124
+ 'Views': views,
125
+ 'Posted': date,
126
+ 'Likes' : likes,
127
+ 'link': link
128
+ }
129
+ youtube_videos.append(vid_items)
130
+ df = pd.DataFrame(youtube_videos)
131
+ st.dataframe(df)
132
+
133
+
134
+
135
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
 
 
 
 
137
 
138
+
139
+
140
+
141
 
142
 
143