nlpblogs commited on
Commit
4381cc0
·
verified ·
1 Parent(s): 0a7152e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -78
app.py CHANGED
@@ -1,74 +1,20 @@
1
- import streamlit as st
2
-
3
  from selenium import webdriver
4
  from selenium.webdriver.common.by import By
5
  from selenium.webdriver.chrome.options import Options
6
-
7
  from selenium.webdriver.chrome.service import Service
8
-
9
  import pandas as pd
10
-
11
  from selenium.webdriver.common.keys import Keys
12
-
13
  from selenium.webdriver.support.ui import WebDriverWait
14
  from selenium.webdriver.support import expected_conditions as EC
15
  import time
16
- import sys
17
  from datetime import datetime
18
-
19
-
20
  from webdriver_manager.chrome import ChromeDriverManager
21
  from selenium.webdriver.chrome.service import Service as ChromeService
22
-
23
  from webdriver_manager.core.os_manager import ChromeType
24
-
25
- import re
26
-
27
-
28
  import transformers
29
- from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
30
- import io
31
  import plotly.express as px
32
- import zipfile
33
- import torch
34
-
35
-
36
-
37
- with st.sidebar:
38
- st.button("DEMO APP", type="primary")
39
-
40
-
41
- expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
42
- expander.write('''
43
-
44
-
45
- **How to Use**
46
- This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
47
-
48
-
49
- **Usage Limits**
50
- You can perform sentiment analysis on YouTube Comments up to 5 times.
51
-
52
-
53
- **Subscription Management**
54
- This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
55
-
56
-
57
- **Customization**
58
- To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
59
-
60
-
61
- **Charts**
62
- Hover to interact with and download the charts.
63
-
64
-
65
- **File Handling and Errors**
66
- For any errors or inquiries, please contact us at [email protected]
67
-
68
-
69
-
70
- ''')
71
-
72
 
73
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
74
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -103,10 +49,8 @@ if st.button("Sentiment Analysis", type="secondary"):
103
  data = []
104
  wait = WebDriverWait(driver, 30)
105
  driver.get(url)
106
-
107
  placeholder = st.empty()
108
  progress_bar = st.progress(0)
109
-
110
  for item in range(150):
111
  try:
112
  body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
@@ -117,32 +61,24 @@ if st.button("Sentiment Analysis", type="secondary"):
117
  except Exception as e:
118
  st.error(f"Exception during scrolling: {e}")
119
  break
120
-
121
  placeholder.text("Scrolling complete.")
122
  progress_bar.empty()
123
-
124
- data = []
125
  try:
126
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
- comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-time")
128
-
129
  user_id = 1
130
  for comment in comments:
131
-
132
  timestamp = datetime.now().strftime("%Y-%m-%d")
133
-
134
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
135
  user_id += 1
136
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
137
-
138
-
139
  except Exception as e:
140
  st.error(f"Exception during comment extraction: {e}")
141
  driver.quit()
142
  df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
143
  st.dataframe(df)
144
 
145
- if tokenizer and model:
146
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
147
  with torch.no_grad():
148
  logits = model(**inputs).logits
@@ -152,7 +88,6 @@ if st.button("Sentiment Analysis", type="secondary"):
152
  for i, label in enumerate(predicted_labels):
153
  results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
154
  sentiment_df = pd.DataFrame(results)
155
-
156
  value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
157
  final_df = value_counts1
158
  tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
@@ -160,14 +95,11 @@ if st.button("Sentiment Analysis", type="secondary"):
160
  fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
161
  fig1.update_traces(textposition='inside', textinfo='percent+label')
162
  st.plotly_chart(fig1)
163
-
164
  result = pd.concat([df, sentiment_df], axis=1)
165
  st.dataframe(result)
166
-
167
  with tab2:
168
  fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
169
  st.plotly_chart(fig2)
170
-
171
  csv = result.to_csv(index=False)
172
  st.download_button(
173
  label="Download data as CSV",
@@ -175,14 +107,12 @@ if st.button("Sentiment Analysis", type="secondary"):
175
  file_name='Summary of the results.csv',
176
  mime='text/csv',
177
  )
178
-
 
179
  else:
180
  st.warning("Please enter a URL.")
181
  else:
182
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
183
 
184
- if 'url_count' in st.session_state: #added if statement.
185
- st.write(f"URL pasted {st.session_state['url_count']} times.")
186
-
187
-
188
-
 
1
+ import streamlit as st
 
2
  from selenium import webdriver
3
  from selenium.webdriver.common.by import By
4
  from selenium.webdriver.chrome.options import Options
 
5
  from selenium.webdriver.chrome.service import Service
 
6
  import pandas as pd
 
7
  from selenium.webdriver.common.keys import Keys
 
8
  from selenium.webdriver.support.ui import WebDriverWait
9
  from selenium.webdriver.support import expected_conditions as EC
10
  import time
 
11
  from datetime import datetime
 
 
12
  from webdriver_manager.chrome import ChromeDriverManager
13
  from selenium.webdriver.chrome.service import Service as ChromeService
 
14
  from webdriver_manager.core.os_manager import ChromeType
 
 
 
 
15
  import transformers
16
+ import torch
 
17
  import plotly.express as px
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
20
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 
49
  data = []
50
  wait = WebDriverWait(driver, 30)
51
  driver.get(url)
 
52
  placeholder = st.empty()
53
  progress_bar = st.progress(0)
 
54
  for item in range(150):
55
  try:
56
  body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
 
61
  except Exception as e:
62
  st.error(f"Exception during scrolling: {e}")
63
  break
 
64
  placeholder.text("Scrolling complete.")
65
  progress_bar.empty()
 
 
66
  try:
67
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
68
+ comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text") #Corrected CSS Selector
 
69
  user_id = 1
70
  for comment in comments:
 
71
  timestamp = datetime.now().strftime("%Y-%m-%d")
 
72
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
73
  user_id += 1
74
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
 
 
75
  except Exception as e:
76
  st.error(f"Exception during comment extraction: {e}")
77
  driver.quit()
78
  df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
79
  st.dataframe(df)
80
 
81
+ if not df.empty and 'Comment' in df.columns and not df['Comment'].empty: #Added checks.
82
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
83
  with torch.no_grad():
84
  logits = model(**inputs).logits
 
88
  for i, label in enumerate(predicted_labels):
89
  results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
90
  sentiment_df = pd.DataFrame(results)
 
91
  value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
92
  final_df = value_counts1
93
  tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
 
95
  fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
96
  fig1.update_traces(textposition='inside', textinfo='percent+label')
97
  st.plotly_chart(fig1)
 
98
  result = pd.concat([df, sentiment_df], axis=1)
99
  st.dataframe(result)
 
100
  with tab2:
101
  fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
102
  st.plotly_chart(fig2)
 
103
  csv = result.to_csv(index=False)
104
  st.download_button(
105
  label="Download data as CSV",
 
107
  file_name='Summary of the results.csv',
108
  mime='text/csv',
109
  )
110
+ else:
111
+ st.warning("No comments were scraped or error occurred during scraping. Sentiment analysis could not be performed.")
112
  else:
113
  st.warning("Please enter a URL.")
114
  else:
115
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
116
 
117
+ if 'url_count' in st.session_state:
118
+ st.write(f"URL pasted {st.session_state['url_count']} times.")