nlpblogs commited on
Commit
31af44b
·
verified ·
1 Parent(s): add0c31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -88
app.py CHANGED
@@ -1,82 +1,26 @@
1
- import streamlit as st
2
-
 
 
3
  from selenium import webdriver
4
- from selenium.webdriver.common.by import By
5
- from selenium.webdriver.chrome.options import Options
6
-
7
  from selenium.webdriver.chrome.service import Service
8
-
9
- import pandas as pd
10
-
11
- from selenium.webdriver.common.keys import Keys
12
-
13
  from selenium.webdriver.support.ui import WebDriverWait
14
  from selenium.webdriver.support import expected_conditions as EC
 
 
15
  import time
16
- import sys
17
- from datetime import datetime
18
-
19
-
20
- from webdriver_manager.chrome import ChromeDriverManager
21
- from selenium.webdriver.chrome.service import Service as ChromeService
22
-
23
- from webdriver_manager.core.os_manager import ChromeType
24
-
25
- import re
26
-
27
-
28
- import transformers
29
- from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
30
- import io
31
  import plotly.express as px
32
- import zipfile
33
- import torch
34
-
35
- import matplotlib.pyplot as plt
36
  from wordcloud import WordCloud
 
 
37
  from nltk.corpus import stopwords
 
38
 
39
- import nltk
40
  nltk.download('stopwords')
41
 
42
-
43
-
44
- with st.sidebar:
45
- st.button("DEMO APP", type="primary")
46
-
47
-
48
- expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
49
- expander.write('''
50
-
51
-
52
- **How to Use**
53
- This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
54
-
55
-
56
- **Usage Limits**
57
- You can perform sentiment analysis on YouTube Comments up to 5 times.
58
-
59
-
60
- **Subscription Management**
61
- This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
62
-
63
-
64
- **Customization**
65
- To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
66
-
67
-
68
- **Charts**
69
- Hover to interact with and download the charts.
70
-
71
-
72
- **File Handling and Errors**
73
- For any errors or inquiries, please contact us at [email protected]
74
-
75
-
76
-
77
- ''')
78
-
79
-
80
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
81
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
82
  model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -98,6 +42,7 @@ st.button("Clear question", on_click=clear_question)
98
  if st.button("Sentiment Analysis", type="secondary"):
99
  if st.session_state['url_count'] < max_attempts:
100
  if url:
 
101
  with st.spinner("Wait for it...", show_time=True):
102
  options = Options()
103
  options.add_argument("--headless")
@@ -110,31 +55,26 @@ if st.button("Sentiment Analysis", type="secondary"):
110
  data = []
111
  wait = WebDriverWait(driver, 30)
112
  driver.get(url)
113
-
114
  placeholder = st.empty()
115
  progress_bar = st.progress(0)
116
-
117
  for item in range(30):
118
  try:
119
  body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
120
  body.send_keys(Keys.END)
121
  placeholder.text(f"Scrolled {item + 1} times")
122
- progress_bar.progress((item + 1) / 150)
123
  time.sleep(0.5)
124
  except Exception as e:
125
  st.error(f"Exception during scrolling: {e}")
126
  break
127
-
128
  placeholder.text("Scrolling complete.")
129
  progress_bar.empty()
130
-
131
  data = []
132
  try:
133
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
134
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
135
  user_id = 1
136
  for comment in comments:
137
-
138
  data.append({"Comment": comment.text})
139
  user_id += 1
140
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
@@ -143,7 +83,6 @@ if st.button("Sentiment Analysis", type="secondary"):
143
  driver.quit()
144
  df = pd.DataFrame(data, columns=["Comment"])
145
  st.dataframe(df)
146
-
147
  if tokenizer and model:
148
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
149
  with torch.no_grad():
@@ -154,7 +93,6 @@ if st.button("Sentiment Analysis", type="secondary"):
154
  for i, label in enumerate(predicted_labels):
155
  results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
156
  sentiment_df = pd.DataFrame(results)
157
-
158
  value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
159
  final_df = value_counts1
160
  tab1, tab2 = st.tabs(["Pie Chart", "Word Cloud"])
@@ -162,13 +100,10 @@ if st.button("Sentiment Analysis", type="secondary"):
162
  fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
163
  fig1.update_traces(textposition='inside', textinfo='percent+label')
164
  st.plotly_chart(fig1)
165
-
166
  result = pd.concat([df, sentiment_df], axis=1)
167
-
168
-
169
  with tab2:
170
  text = " ".join(comment for comment in df['Comment'])
171
- stopwords_set = set(stopwords.words('english'))
172
  text = re.sub('[^A-Za-z]+', ' ', text)
173
  words = text.split()
174
  clean_text = [word for word in words if word.lower() not in stopwords_set]
@@ -178,8 +113,6 @@ if st.button("Sentiment Analysis", type="secondary"):
178
  plt.imshow(wc)
179
  plt.axis('off')
180
  st.pyplot(fig)
181
-
182
-
183
  csv = result.to_csv(index=False)
184
  st.download_button(
185
  label="Download data as CSV",
@@ -187,14 +120,10 @@ if st.button("Sentiment Analysis", type="secondary"):
187
  file_name='Summary of the results.csv',
188
  mime='text/csv',
189
  )
190
-
191
  else:
192
  st.warning("Please enter a URL.")
193
  else:
194
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
195
 
196
- if 'url_count' in st.session_state: #added if statement.
197
- st.write(f"URL pasted {st.session_state['url_count']} times.")
198
-
199
-
200
-
 
1
+ import streamlit as st
2
+ import transformers
3
+ import pandas as pd
4
+ import torch
5
  from selenium import webdriver
 
 
 
6
  from selenium.webdriver.chrome.service import Service
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+ from webdriver_manager.chrome import ChromeType
9
+ from selenium.webdriver.common.by import By
 
 
10
  from selenium.webdriver.support.ui import WebDriverWait
11
  from selenium.webdriver.support import expected_conditions as EC
12
+ from selenium.webdriver.common.keys import Keys
13
+ from selenium.webdriver.chrome.options import Options
14
  import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  import plotly.express as px
 
 
 
 
16
  from wordcloud import WordCloud
17
+ import matplotlib.pyplot as plt
18
+ import nltk
19
  from nltk.corpus import stopwords
20
+ import re
21
 
 
22
  nltk.download('stopwords')
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
25
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
26
  model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
 
42
  if st.button("Sentiment Analysis", type="secondary"):
43
  if st.session_state['url_count'] < max_attempts:
44
  if url:
45
+ update_url_count() # Increment count only when the button is pressed and URL is valid.
46
  with st.spinner("Wait for it...", show_time=True):
47
  options = Options()
48
  options.add_argument("--headless")
 
55
  data = []
56
  wait = WebDriverWait(driver, 30)
57
  driver.get(url)
 
58
  placeholder = st.empty()
59
  progress_bar = st.progress(0)
 
60
  for item in range(30):
61
  try:
62
  body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
63
  body.send_keys(Keys.END)
64
  placeholder.text(f"Scrolled {item + 1} times")
65
+ progress_bar.progress((item + 1) / 30) # Corrected progress bar update
66
  time.sleep(0.5)
67
  except Exception as e:
68
  st.error(f"Exception during scrolling: {e}")
69
  break
 
70
  placeholder.text("Scrolling complete.")
71
  progress_bar.empty()
 
72
  data = []
73
  try:
74
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
75
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
76
  user_id = 1
77
  for comment in comments:
 
78
  data.append({"Comment": comment.text})
79
  user_id += 1
80
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
 
83
  driver.quit()
84
  df = pd.DataFrame(data, columns=["Comment"])
85
  st.dataframe(df)
 
86
  if tokenizer and model:
87
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
88
  with torch.no_grad():
 
93
  for i, label in enumerate(predicted_labels):
94
  results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
95
  sentiment_df = pd.DataFrame(results)
 
96
  value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
97
  final_df = value_counts1
98
  tab1, tab2 = st.tabs(["Pie Chart", "Word Cloud"])
 
100
  fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
101
  fig1.update_traces(textposition='inside', textinfo='percent+label')
102
  st.plotly_chart(fig1)
 
103
  result = pd.concat([df, sentiment_df], axis=1)
 
 
104
  with tab2:
105
  text = " ".join(comment for comment in df['Comment'])
106
+ stopwords_set = set(stopwords.words('english'))
107
  text = re.sub('[^A-Za-z]+', ' ', text)
108
  words = text.split()
109
  clean_text = [word for word in words if word.lower() not in stopwords_set]
 
113
  plt.imshow(wc)
114
  plt.axis('off')
115
  st.pyplot(fig)
 
 
116
  csv = result.to_csv(index=False)
117
  st.download_button(
118
  label="Download data as CSV",
 
120
  file_name='Summary of the results.csv',
121
  mime='text/csv',
122
  )
 
123
  else:
124
  st.warning("Please enter a URL.")
125
  else:
126
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
127
 
128
+ if 'url_count' in st.session_state:
129
+ st.write(f"URL pasted {st.session_state['url_count']} times.")