nlpblogs commited on
Commit
6fafb78
·
verified ·
1 Parent(s): 645880e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -76
app.py CHANGED
@@ -1,74 +1,19 @@
1
- import streamlit as st
2
-
3
  from selenium import webdriver
4
  from selenium.webdriver.common.by import By
5
  from selenium.webdriver.chrome.options import Options
6
-
7
  from selenium.webdriver.chrome.service import Service
8
-
9
  import pandas as pd
10
-
11
  from selenium.webdriver.common.keys import Keys
12
-
13
  from selenium.webdriver.support.ui import WebDriverWait
14
  from selenium.webdriver.support import expected_conditions as EC
15
  import time
16
- import sys
17
  from datetime import datetime
18
-
19
-
20
  from webdriver_manager.chrome import ChromeDriverManager
21
- from selenium.webdriver.chrome.service import Service as ChromeService
22
-
23
- from webdriver_manager.core.os_manager import ChromeType
24
-
25
- import re
26
-
27
-
28
  import transformers
29
- from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
30
- import io
31
  import plotly.express as px
32
- import zipfile
33
- import torch
34
-
35
-
36
-
37
- with st.sidebar:
38
- st.button("DEMO APP", type="primary")
39
-
40
-
41
- expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
42
- expander.write('''
43
-
44
-
45
- **How to Use**
46
- This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
47
-
48
-
49
- **Usage Limits**
50
- You can perform sentiment analysis on YouTube Comments up to 5 times.
51
-
52
-
53
- **Subscription Management**
54
- This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
55
-
56
-
57
- **Customization**
58
- To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
59
-
60
-
61
- **Charts**
62
- Hover to interact with and download the charts.
63
-
64
-
65
- **File Handling and Errors**
66
- For any errors or inquiries, please contact us at [email protected]
67
-
68
-
69
-
70
- ''')
71
-
72
 
73
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
74
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -109,8 +54,8 @@ if st.button("Sentiment Analysis", type="secondary"):
109
 
110
  for item in range(30):
111
  try:
112
- body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
113
- body.send_keys(Keys.END)
114
  placeholder.text(f"Scrolled {item + 1} times")
115
  progress_bar.progress((item + 1) / 30)
116
  time.sleep(0.5)
@@ -121,26 +66,58 @@ if st.button("Sentiment Analysis", type="secondary"):
121
  placeholder.text("Scrolling complete.")
122
  progress_bar.empty()
123
 
124
- data = []
125
  try:
126
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
128
- user_id = 1
129
  for comment in comments:
130
- timestamp_elements = comment.find_elements(By.XPATH, './/a[@id="time"]') #Correct XPATH syntax.
131
- timestamp = "Timestamp not found"
132
- if timestamp_elements:
133
- timestamp = timestamp_elements[0].text
134
- data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
135
- user_id += 1
136
- data = [dict(t) for t in {tuple(d.items()) for d in data}]
137
-
138
  except Exception as e:
139
  st.error(f"Exception during comment extraction: {e}")
140
  driver.quit()
141
- df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
142
- st.dataframe(df)
143
-
144
-
145
-
146
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
 
2
  from selenium import webdriver
3
  from selenium.webdriver.common.by import By
4
  from selenium.webdriver.chrome.options import Options
 
5
  from selenium.webdriver.chrome.service import Service
 
6
  import pandas as pd
 
7
  from selenium.webdriver.common.keys import Keys
 
8
  from selenium.webdriver.support.ui import WebDriverWait
9
  from selenium.webdriver.support import expected_conditions as EC
10
  import time
 
11
  from datetime import datetime
 
 
12
  from webdriver_manager.chrome import ChromeDriverManager
13
+ from webdriver_manager.chrome import ChromeType
 
 
 
 
 
 
14
  import transformers
15
+ import torch
 
16
  import plotly.express as px
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  st.subheader("YouTube Comments Sentiment Analysis", divider="red")
19
  tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 
54
 
55
  for item in range(30):
56
  try:
57
+ driver.execute_script("window.scrollBy(0, 500);")
58
+ wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
59
  placeholder.text(f"Scrolled {item + 1} times")
60
  progress_bar.progress((item + 1) / 30)
61
  time.sleep(0.5)
 
66
  placeholder.text("Scrolling complete.")
67
  progress_bar.empty()
68
 
 
69
  try:
70
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
71
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
 
72
  for comment in comments:
73
+ try:
74
+ # Robust XPath to find the timestamp
75
+ timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
76
+ timestamp = timestamp_element.text
77
+ except Exception:
78
+ timestamp = "Timestamp not found"
79
+ data.append({"Comment": comment.text, "comment_date": timestamp})
 
80
  except Exception as e:
81
  st.error(f"Exception during comment extraction: {e}")
82
  driver.quit()
83
+ df = pd.DataFrame(data, columns=["Comment", "comment_date"])
84
+
85
+ if not df.empty and not df['Comment'].tolist() == []:
86
+ st.dataframe(df)
87
+ inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
88
+ with torch.no_grad():
89
+ logits = model(**inputs).logits
90
+ predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
91
+ predicted_labels = predicted_probabilities.argmax(dim=1)
92
+ results = []
93
+ for i, label in enumerate(predicted_labels):
94
+ results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
95
+ sentiment_df = pd.DataFrame(results)
96
+
97
+ value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
98
+ final_df = value_counts1
99
+ tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
100
+ with tab1:
101
+ fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
102
+ fig1.update_traces(textposition='inside', textinfo='percent+label')
103
+ st.plotly_chart(fig1)
104
+
105
+ result = pd.concat([df, sentiment_df], axis=1)
106
+ st.dataframe(result)
107
+
108
+ with tab2:
109
+ fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
110
+ st.plotly_chart(fig2)
111
+
112
+ csv = result.to_csv(index=False)
113
+ st.download_button(label="Download data as CSV", data=csv, file_name='Summary of the results.csv', mime='text/csv')
114
+ else:
115
+ st.warning("No comments were scraped. Sentiment analysis could not be performed.")
116
+
117
+ else:
118
+ st.warning("Please enter a URL.")
119
+ else:
120
+ st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
121
+
122
+ if 'url_count' in st.session_state:
123
+ st.write(f"URL pasted {st.session_state['url_count']} times.")