nlpblogs commited on
Commit
c6127ba
·
verified ·
1 Parent(s): 02e10bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -14
app.py CHANGED
@@ -1,20 +1,74 @@
1
- import streamlit as st
 
2
  from selenium import webdriver
3
  from selenium.webdriver.common.by import By
4
  from selenium.webdriver.chrome.options import Options
 
5
  from selenium.webdriver.chrome.service import Service
 
6
  import pandas as pd
 
7
  from selenium.webdriver.common.keys import Keys
 
8
  from selenium.webdriver.support.ui import WebDriverWait
9
  from selenium.webdriver.support import expected_conditions as EC
10
  import time
 
11
  from datetime import datetime
 
 
12
  from webdriver_manager.chrome import ChromeDriverManager
13
  from selenium.webdriver.chrome.service import Service as ChromeService
 
14
  from webdriver_manager.core.os_manager import ChromeType
 
 
 
 
15
  import transformers
16
- import torch
 
17
  import plotly.express as px
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
20
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -49,9 +103,11 @@ if st.button("Sentiment Analysis", type="secondary"):
49
  data = []
50
  wait = WebDriverWait(driver, 30)
51
  driver.get(url)
 
52
  placeholder = st.empty()
53
  progress_bar = st.progress(0)
54
- for item in range(150):
 
55
  try:
56
  body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
57
  body.send_keys(Keys.END)
@@ -61,19 +117,19 @@ if st.button("Sentiment Analysis", type="secondary"):
61
  except Exception as e:
62
  st.error(f"Exception during scrolling: {e}")
63
  break
 
64
  placeholder.text("Scrolling complete.")
65
  progress_bar.empty()
 
 
66
  try:
67
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
68
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
69
  user_id = 1
70
  for comment in comments:
71
- timestamp = None
72
- try:
73
- timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
74
- timestamp = timestamp_element.text
75
- except Exception as e:
76
- print(f"Date not found for comment: {comment.text}. Error: {e}")
77
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
78
  user_id += 1
79
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
@@ -83,7 +139,7 @@ if st.button("Sentiment Analysis", type="secondary"):
83
  df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
84
  st.dataframe(df)
85
 
86
- if not df.empty and 'Comment' in df.columns and not df['Comment'].empty:
87
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
88
  with torch.no_grad():
89
  logits = model(**inputs).logits
@@ -93,6 +149,7 @@ if st.button("Sentiment Analysis", type="secondary"):
93
  for i, label in enumerate(predicted_labels):
94
  results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
95
  sentiment_df = pd.DataFrame(results)
 
96
  value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
97
  final_df = value_counts1
98
  tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
@@ -100,11 +157,14 @@ if st.button("Sentiment Analysis", type="secondary"):
100
  fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
101
  fig1.update_traces(textposition='inside', textinfo='percent+label')
102
  st.plotly_chart(fig1)
 
103
  result = pd.concat([df, sentiment_df], axis=1)
104
  st.dataframe(result)
 
105
  with tab2:
106
  fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
107
  st.plotly_chart(fig2)
 
108
  csv = result.to_csv(index=False)
109
  st.download_button(
110
  label="Download data as CSV",
@@ -112,12 +172,14 @@ if st.button("Sentiment Analysis", type="secondary"):
112
  file_name='Summary of the results.csv',
113
  mime='text/csv',
114
  )
115
- else:
116
- st.warning("No comments were scraped or error occurred during scraping. Sentiment analysis could not be performed.")
117
  else:
118
  st.warning("Please enter a URL.")
119
  else:
120
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
121
 
122
- if 'url_count' in st.session_state:
123
- st.write(f"URL pasted {st.session_state['url_count']} times.")
 
 
 
 
1
+ import streamlit as st
2
+
3
  from selenium import webdriver
4
  from selenium.webdriver.common.by import By
5
  from selenium.webdriver.chrome.options import Options
6
+
7
  from selenium.webdriver.chrome.service import Service
8
+
9
  import pandas as pd
10
+
11
  from selenium.webdriver.common.keys import Keys
12
+
13
  from selenium.webdriver.support.ui import WebDriverWait
14
  from selenium.webdriver.support import expected_conditions as EC
15
  import time
16
+ import sys
17
  from datetime import datetime
18
+
19
+
20
  from webdriver_manager.chrome import ChromeDriverManager
21
  from selenium.webdriver.chrome.service import Service as ChromeService
22
+
23
  from webdriver_manager.core.os_manager import ChromeType
24
+
25
+ import re
26
+
27
+
28
  import transformers
29
+ from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
30
+ import io
31
  import plotly.express as px
32
+ import zipfile
33
+ import torch
34
+
35
+
36
+
37
+ with st.sidebar:
38
+ st.button("DEMO APP", type="primary")
39
+
40
+
41
+ expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
42
+ expander.write('''
43
+
44
+
45
+ **How to Use**
46
+ This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
47
+
48
+
49
+ **Usage Limits**
50
+ You can perform sentiment analysis on YouTube Comments up to 5 times.
51
+
52
+
53
+ **Subscription Management**
54
+ This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
55
+
56
+
57
+ **Customization**
58
+ To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
59
+
60
+
61
+ **Charts**
62
+ Hover to interact with and download the charts.
63
+
64
+
65
+ **File Handling and Errors**
66
+ For any errors or inquiries, please contact us at [email protected]
67
+
68
+
69
+
70
+ ''')
71
+
72
 
73
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
74
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 
103
  data = []
104
  wait = WebDriverWait(driver, 30)
105
  driver.get(url)
106
+
107
  placeholder = st.empty()
108
  progress_bar = st.progress(0)
109
+
110
+ for item in range(30):
111
  try:
112
  body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
113
  body.send_keys(Keys.END)
 
117
  except Exception as e:
118
  st.error(f"Exception during scrolling: {e}")
119
  break
120
+
121
  placeholder.text("Scrolling complete.")
122
  progress_bar.empty()
123
+
124
+ data = []
125
  try:
126
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
128
  user_id = 1
129
  for comment in comments:
130
+ match = re.search(r'\d{4}-\d{2}-\d{2}', comment)
131
+ timestamp = datetime.datetime.strptime(match.group(), '%Y-%m-%d').date()
132
+ st.write(timestamp)
 
 
 
133
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
134
  user_id += 1
135
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
 
139
  df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
140
  st.dataframe(df)
141
 
142
+ if tokenizer and model:
143
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
144
  with torch.no_grad():
145
  logits = model(**inputs).logits
 
149
  for i, label in enumerate(predicted_labels):
150
  results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
151
  sentiment_df = pd.DataFrame(results)
152
+
153
  value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
154
  final_df = value_counts1
155
  tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
 
157
  fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
158
  fig1.update_traces(textposition='inside', textinfo='percent+label')
159
  st.plotly_chart(fig1)
160
+
161
  result = pd.concat([df, sentiment_df], axis=1)
162
  st.dataframe(result)
163
+
164
  with tab2:
165
  fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
166
  st.plotly_chart(fig2)
167
+
168
  csv = result.to_csv(index=False)
169
  st.download_button(
170
  label="Download data as CSV",
 
172
  file_name='Summary of the results.csv',
173
  mime='text/csv',
174
  )
175
+
 
176
  else:
177
  st.warning("Please enter a URL.")
178
  else:
179
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
180
 
181
+ if 'url_count' in st.session_state: #added if statement.
182
+ st.write(f"URL pasted {st.session_state['url_count']} times.")
183
+
184
+
185
+