nlpblogs commited on
Commit
5a1ddac
·
verified ·
1 Parent(s): 7aa5e6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -52
app.py CHANGED
@@ -4,44 +4,14 @@ from selenium.webdriver.common.by import By
4
  from selenium.webdriver.chrome.options import Options
5
  from selenium.webdriver.chrome.service import Service
6
  import pandas as pd
7
- from selenium.webdriver.common.keys import Keys
8
  from selenium.webdriver.support.ui import WebDriverWait
9
  from selenium.webdriver.support import expected_conditions as EC
10
  import time
11
- from datetime import datetime
12
  from webdriver_manager.chrome import ChromeDriverManager
13
- from selenium.webdriver.chrome.service import Service as ChromeService
14
- from webdriver_manager.core.os_manager import ChromeType
15
- import re
16
  import transformers
17
- from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
18
- import io
19
- import plotly.express as px
20
- import zipfile
21
  import torch
22
-
23
- with st.sidebar:
24
- st.button("DEMO APP", type="primary")
25
- expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
26
- expander.write('''
27
- **How to Use**
28
- This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
29
-
30
- **Usage Limits**
31
- You can perform sentiment analysis on YouTube Comments up to 5 times.
32
-
33
- **Subscription Management**
34
- This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
35
-
36
- **Customization**
37
- To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
38
-
39
- **Charts**
40
- Hover to interact with and download the charts.
41
-
42
- **File Handling and Errors**
43
- For any errors or inquiries, please contact us at [email protected]
44
- ''')
45
 
46
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
47
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -76,40 +46,54 @@ if st.button("Sentiment Analysis", type="secondary"):
76
  data = []
77
  wait = WebDriverWait(driver, 30)
78
  driver.get(url)
 
79
  placeholder = st.empty()
80
  progress_bar = st.progress(0)
 
81
  for item in range(30):
82
  try:
83
- body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
84
- body.send_keys(Keys.END)
85
  placeholder.text(f"Scrolled {item + 1} times")
86
- progress_bar.progress((item + 1) / 150)
87
- time.sleep(0.5)
88
  except Exception as e:
89
  st.error(f"Exception during scrolling: {e}")
90
  break
 
91
  placeholder.text("Scrolling complete.")
92
  progress_bar.empty()
 
93
  try:
94
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
95
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
96
- user_id = 1
97
  for comment in comments:
98
  timestamp = None
99
  try:
 
100
  timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
101
  timestamp = timestamp_element.text
102
- except Exception as e:
103
- print(f"Timestamp not found for comment: {comment.text}. Error: {e}")
104
- data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
105
- user_id += 1
106
- data = [dict(t) for t in {tuple(d.items()) for d in data}]
 
 
 
 
 
 
 
 
 
107
  except Exception as e:
108
  st.error(f"Exception during comment extraction: {e}")
109
  driver.quit()
110
- df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
111
- st.dataframe(df)
112
- if tokenizer and model:
 
113
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
114
  with torch.no_grad():
115
  logits = model(**inputs).logits
@@ -119,6 +103,7 @@ if st.button("Sentiment Analysis", type="secondary"):
119
  for i, label in enumerate(predicted_labels):
120
  results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
121
  sentiment_df = pd.DataFrame(results)
 
122
  value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
123
  final_df = value_counts1
124
  tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
@@ -126,22 +111,23 @@ if st.button("Sentiment Analysis", type="secondary"):
126
  fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
127
  fig1.update_traces(textposition='inside', textinfo='percent+label')
128
  st.plotly_chart(fig1)
 
129
  result = pd.concat([df, sentiment_df], axis=1)
130
  st.dataframe(result)
 
131
  with tab2:
132
  fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
133
  st.plotly_chart(fig2)
 
134
  csv = result.to_csv(index=False)
135
- st.download_button(
136
- label="Download data as CSV",
137
- data=csv,
138
- file_name='Summary of the results.csv',
139
- mime='text/csv',
140
- )
141
  else:
142
  st.warning("Please enter a URL.")
143
  else:
144
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
145
 
146
  if 'url_count' in st.session_state:
147
- st.write(f"URL pasted {st.session_state['url_count']} times.")
 
4
  from selenium.webdriver.chrome.options import Options
5
  from selenium.webdriver.chrome.service import Service
6
  import pandas as pd
 
7
  from selenium.webdriver.support.ui import WebDriverWait
8
  from selenium.webdriver.support import expected_conditions as EC
9
  import time
 
10
  from webdriver_manager.chrome import ChromeDriverManager
11
+ from webdriver_manager.chrome import ChromeType
 
 
12
  import transformers
 
 
 
 
13
  import torch
14
+ import plotly.express as px
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
17
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 
46
  data = []
47
  wait = WebDriverWait(driver, 30)
48
  driver.get(url)
49
+
50
  placeholder = st.empty()
51
  progress_bar = st.progress(0)
52
+
53
  for item in range(30):
54
  try:
55
+ driver.execute_script("window.scrollBy(0, 500);")
56
+ wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
57
  placeholder.text(f"Scrolled {item + 1} times")
58
+ progress_bar.progress((item + 1) / 30)
59
+ time.sleep(1) #Increased wait time for dynamic loading
60
  except Exception as e:
61
  st.error(f"Exception during scrolling: {e}")
62
  break
63
+
64
  placeholder.text("Scrolling complete.")
65
  progress_bar.empty()
66
+
67
  try:
68
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
69
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
 
70
  for comment in comments:
71
  timestamp = None
72
  try:
73
+ # Try a more direct XPath
74
  timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
75
  timestamp = timestamp_element.text
76
+ except Exception:
77
+ try:
78
+ # Try a more general XPath
79
+ timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[contains(@class, "time-text")]')
80
+ timestamp = timestamp_element.text
81
+ except Exception:
82
+ try:
83
+ #try grabbing the a tag.
84
+ timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//a[@id="time"]')
85
+ timestamp = timestamp_element.text
86
+ except Exception as inner_e:
87
+ print(f"Timestamp not found for comment: {comment.text}. Error: {inner_e}") #debug
88
+ data.append({"Comment": comment.text, "comment_date": timestamp})
89
+
90
  except Exception as e:
91
  st.error(f"Exception during comment extraction: {e}")
92
  driver.quit()
93
+ df = pd.DataFrame(data, columns=["Comment", "comment_date"])
94
+
95
+ if not df.empty and not df['Comment'].tolist() == []:
96
+ st.dataframe(df)
97
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
98
  with torch.no_grad():
99
  logits = model(**inputs).logits
 
103
  for i, label in enumerate(predicted_labels):
104
  results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
105
  sentiment_df = pd.DataFrame(results)
106
+
107
  value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
108
  final_df = value_counts1
109
  tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
 
111
  fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
112
  fig1.update_traces(textposition='inside', textinfo='percent+label')
113
  st.plotly_chart(fig1)
114
+
115
  result = pd.concat([df, sentiment_df], axis=1)
116
  st.dataframe(result)
117
+
118
  with tab2:
119
  fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
120
  st.plotly_chart(fig2)
121
+
122
  csv = result.to_csv(index=False)
123
+ st.download_button(label="Download data as CSV", data=csv, file_name='Summary of the results.csv', mime='text/csv')
124
+ else:
125
+ st.warning("No comments were scraped. Sentiment analysis could not be performed.")
126
+
 
 
127
  else:
128
  st.warning("Please enter a URL.")
129
  else:
130
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
131
 
132
  if 'url_count' in st.session_state:
133
+ st.write(f"URL pasted {st.session_state['url_count']} times.")