nlpblogs commited on
Commit
2096041
·
verified ·
1 Parent(s): 0b66ee9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -28
app.py CHANGED
@@ -1,17 +1,74 @@
1
- import streamlit as st
 
2
  from selenium import webdriver
3
  from selenium.webdriver.common.by import By
4
  from selenium.webdriver.chrome.options import Options
 
5
  from selenium.webdriver.chrome.service import Service
 
6
  import pandas as pd
 
 
 
7
  from selenium.webdriver.support.ui import WebDriverWait
8
  from selenium.webdriver.support import expected_conditions as EC
9
  import time
 
 
 
 
10
  from webdriver_manager.chrome import ChromeDriverManager
11
- from webdriver_manager.chrome import ChromeType
 
 
 
 
 
 
12
  import transformers
13
- import torch
 
14
  import plotly.express as px
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
17
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -50,13 +107,13 @@ if st.button("Sentiment Analysis", type="secondary"):
50
  placeholder = st.empty()
51
  progress_bar = st.progress(0)
52
 
53
- for item in range(30):
54
  try:
55
- driver.execute_script("window.scrollBy(0, 500);")
56
- wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
57
  placeholder.text(f"Scrolled {item + 1} times")
58
- progress_bar.progress((item + 1) / 30)
59
- time.sleep(1) #Increased wait time for dynamic loading
60
  except Exception as e:
61
  st.error(f"Exception during scrolling: {e}")
62
  break
@@ -64,28 +121,23 @@ if st.button("Sentiment Analysis", type="secondary"):
64
  placeholder.text("Scrolling complete.")
65
  progress_bar.empty()
66
 
 
67
  try:
68
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
69
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
 
70
  for comment in comments:
71
- timestamp = None
72
- try:
73
- comment_text = comment.text
74
- date_match = re.search(r'\d+ (day|week|month|year)s? ago', comment_text) #Example regex.
75
- if date_match:
76
- timestamp = date_match.group(0)
77
- except Exception as e:
78
- st.error(f"Error extracting date with regex: {e}")
79
- data.append({"Comment": comment.text, "comment_date": timestamp})
80
-
81
-
82
  except Exception as e:
83
  st.error(f"Exception during comment extraction: {e}")
84
  driver.quit()
85
- df = pd.DataFrame(data, columns=["Comment", "comment_date"])
 
86
 
87
- if not df.empty and not df['Comment'].tolist() == []:
88
- st.dataframe(df)
89
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
90
  with torch.no_grad():
91
  logits = model(**inputs).logits
@@ -112,14 +164,29 @@ if st.button("Sentiment Analysis", type="secondary"):
112
  st.plotly_chart(fig2)
113
 
114
  csv = result.to_csv(index=False)
115
- st.download_button(label="Download data as CSV", data=csv, file_name='Summary of the results.csv', mime='text/csv')
116
- else:
117
- st.warning("No comments were scraped. Sentiment analysis could not be performed.")
118
-
 
 
 
119
  else:
120
  st.warning("Please enter a URL.")
121
  else:
122
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
123
 
124
- if 'url_count' in st.session_state:
125
- st.write(f"URL pasted {st.session_state['url_count']} times.")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
  from selenium import webdriver
4
  from selenium.webdriver.common.by import By
5
  from selenium.webdriver.chrome.options import Options
6
+
7
  from selenium.webdriver.chrome.service import Service
8
+
9
  import pandas as pd
10
+
11
+ from selenium.webdriver.common.keys import Keys
12
+
13
  from selenium.webdriver.support.ui import WebDriverWait
14
  from selenium.webdriver.support import expected_conditions as EC
15
  import time
16
+ import sys
17
+ from datetime import datetime
18
+
19
+
20
  from webdriver_manager.chrome import ChromeDriverManager
21
+ from selenium.webdriver.chrome.service import Service as ChromeService
22
+
23
+ from webdriver_manager.core.os_manager import ChromeType
24
+
25
+ import re
26
+
27
+
28
  import transformers
29
+ from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
30
+ import io
31
  import plotly.express as px
32
+ import zipfile
33
+ import torch
34
+
35
+
36
+
37
+ with st.sidebar:
38
+ st.button("DEMO APP", type="primary")
39
+
40
+
41
+ expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
42
+ expander.write('''
43
+
44
+
45
+ **How to Use**
46
+ This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
47
+
48
+
49
+ **Usage Limits**
50
+ You can perform sentiment analysis on YouTube Comments up to 5 times.
51
+
52
+
53
+ **Subscription Management**
54
+ This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
55
+
56
+
57
+ **Customization**
58
+ To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
59
+
60
+
61
+ **Charts**
62
+ Hover to interact with and download the charts.
63
+
64
+
65
+ **File Handling and Errors**
66
+ For any errors or inquiries, please contact us at [email protected]
67
+
68
+
69
+
70
+ ''')
71
+
72
 
73
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
74
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 
107
  placeholder = st.empty()
108
  progress_bar = st.progress(0)
109
 
110
+ for item in range(150):
111
  try:
112
+ body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
113
+ body.send_keys(Keys.END)
114
  placeholder.text(f"Scrolled {item + 1} times")
115
+ progress_bar.progress((item + 1) / 150)
116
+ time.sleep(0.5)
117
  except Exception as e:
118
  st.error(f"Exception during scrolling: {e}")
119
  break
 
121
  placeholder.text("Scrolling complete.")
122
  progress_bar.empty()
123
 
124
+ data = []
125
  try:
126
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
128
+ user_id = 1
129
  for comment in comments:
130
+ timestamp = datetime.now().strftime("%Y-%m-%d")
131
+ data.append({"User ID": user_name, "Comment": comment.text, "comment_date": timestamp})
132
+ user_id += 1
133
+ data = [dict(t) for t in {tuple(d.items()) for d in data}]
 
 
 
 
 
 
 
134
  except Exception as e:
135
  st.error(f"Exception during comment extraction: {e}")
136
  driver.quit()
137
+ df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
138
+ st.dataframe(df)
139
 
140
+ if tokenizer and model:
 
141
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
142
  with torch.no_grad():
143
  logits = model(**inputs).logits
 
164
  st.plotly_chart(fig2)
165
 
166
  csv = result.to_csv(index=False)
167
+ st.download_button(
168
+ label="Download data as CSV",
169
+ data=csv,
170
+ file_name='Summary of the results.csv',
171
+ mime='text/csv',
172
+ )
173
+
174
  else:
175
  st.warning("Please enter a URL.")
176
  else:
177
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
178
 
179
+ if 'url_count' in st.session_state: #added if statement.
180
+ st.write(f"URL pasted {st.session_state['url_count']} times.")
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+