Update app.py
Browse files
app.py
CHANGED
@@ -4,44 +4,14 @@ from selenium.webdriver.common.by import By
|
|
4 |
from selenium.webdriver.chrome.options import Options
|
5 |
from selenium.webdriver.chrome.service import Service
|
6 |
import pandas as pd
|
7 |
-
from selenium.webdriver.common.keys import Keys
|
8 |
from selenium.webdriver.support.ui import WebDriverWait
|
9 |
from selenium.webdriver.support import expected_conditions as EC
|
10 |
import time
|
11 |
-
from datetime import datetime
|
12 |
from webdriver_manager.chrome import ChromeDriverManager
|
13 |
-
from
|
14 |
-
from webdriver_manager.core.os_manager import ChromeType
|
15 |
-
import re
|
16 |
import transformers
|
17 |
-
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
|
18 |
-
import io
|
19 |
-
import plotly.express as px
|
20 |
-
import zipfile
|
21 |
import torch
|
22 |
-
|
23 |
-
with st.sidebar:
|
24 |
-
st.button("DEMO APP", type="primary")
|
25 |
-
expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
|
26 |
-
expander.write('''
|
27 |
-
**How to Use**
|
28 |
-
This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
|
29 |
-
|
30 |
-
**Usage Limits**
|
31 |
-
You can perform sentiment analysis on YouTube Comments up to 5 times.
|
32 |
-
|
33 |
-
**Subscription Management**
|
34 |
-
This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
|
35 |
-
|
36 |
-
**Customization**
|
37 |
-
To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
|
38 |
-
|
39 |
-
**Charts**
|
40 |
-
Hover to interact with and download the charts.
|
41 |
-
|
42 |
-
**File Handling and Errors**
|
43 |
-
For any errors or inquiries, please contact us at [email protected]
|
44 |
-
''')
|
45 |
|
46 |
st.subheader("YouTube Comments Sentiment Analysis", divider="red")
|
47 |
tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
|
@@ -76,40 +46,54 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
76 |
data = []
|
77 |
wait = WebDriverWait(driver, 30)
|
78 |
driver.get(url)
|
|
|
79 |
placeholder = st.empty()
|
80 |
progress_bar = st.progress(0)
|
|
|
81 |
for item in range(30):
|
82 |
try:
|
83 |
-
|
84 |
-
|
85 |
placeholder.text(f"Scrolled {item + 1} times")
|
86 |
-
progress_bar.progress((item + 1) /
|
87 |
-
time.sleep(
|
88 |
except Exception as e:
|
89 |
st.error(f"Exception during scrolling: {e}")
|
90 |
break
|
|
|
91 |
placeholder.text("Scrolling complete.")
|
92 |
progress_bar.empty()
|
|
|
93 |
try:
|
94 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
95 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
96 |
-
user_id = 1
|
97 |
for comment in comments:
|
98 |
timestamp = None
|
99 |
try:
|
|
|
100 |
timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
|
101 |
timestamp = timestamp_element.text
|
102 |
-
except Exception
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
except Exception as e:
|
108 |
st.error(f"Exception during comment extraction: {e}")
|
109 |
driver.quit()
|
110 |
-
df = pd.DataFrame(data, columns=["
|
111 |
-
|
112 |
-
if
|
|
|
113 |
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
114 |
with torch.no_grad():
|
115 |
logits = model(**inputs).logits
|
@@ -119,6 +103,7 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
119 |
for i, label in enumerate(predicted_labels):
|
120 |
results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
|
121 |
sentiment_df = pd.DataFrame(results)
|
|
|
122 |
value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
|
123 |
final_df = value_counts1
|
124 |
tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
|
@@ -126,22 +111,23 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
126 |
fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
|
127 |
fig1.update_traces(textposition='inside', textinfo='percent+label')
|
128 |
st.plotly_chart(fig1)
|
|
|
129 |
result = pd.concat([df, sentiment_df], axis=1)
|
130 |
st.dataframe(result)
|
|
|
131 |
with tab2:
|
132 |
fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
|
133 |
st.plotly_chart(fig2)
|
|
|
134 |
csv = result.to_csv(index=False)
|
135 |
-
st.download_button(
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
mime='text/csv',
|
140 |
-
)
|
141 |
else:
|
142 |
st.warning("Please enter a URL.")
|
143 |
else:
|
144 |
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
145 |
|
146 |
if 'url_count' in st.session_state:
|
147 |
-
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
|
|
4 |
from selenium.webdriver.chrome.options import Options
|
5 |
from selenium.webdriver.chrome.service import Service
|
6 |
import pandas as pd
|
|
|
7 |
from selenium.webdriver.support.ui import WebDriverWait
|
8 |
from selenium.webdriver.support import expected_conditions as EC
|
9 |
import time
|
|
|
10 |
from webdriver_manager.chrome import ChromeDriverManager
|
11 |
+
from webdriver_manager.chrome import ChromeType
|
|
|
|
|
12 |
import transformers
|
|
|
|
|
|
|
|
|
13 |
import torch
|
14 |
+
import plotly.express as px
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
st.subheader("YouTube Comments Sentiment Analysis", divider="red")
|
17 |
tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
|
|
|
46 |
data = []
|
47 |
wait = WebDriverWait(driver, 30)
|
48 |
driver.get(url)
|
49 |
+
|
50 |
placeholder = st.empty()
|
51 |
progress_bar = st.progress(0)
|
52 |
+
|
53 |
for item in range(30):
|
54 |
try:
|
55 |
+
driver.execute_script("window.scrollBy(0, 500);")
|
56 |
+
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
|
57 |
placeholder.text(f"Scrolled {item + 1} times")
|
58 |
+
progress_bar.progress((item + 1) / 30)
|
59 |
+
time.sleep(1) #Increased wait time for dynamic loading
|
60 |
except Exception as e:
|
61 |
st.error(f"Exception during scrolling: {e}")
|
62 |
break
|
63 |
+
|
64 |
placeholder.text("Scrolling complete.")
|
65 |
progress_bar.empty()
|
66 |
+
|
67 |
try:
|
68 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
69 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
|
|
70 |
for comment in comments:
|
71 |
timestamp = None
|
72 |
try:
|
73 |
+
# Try a more direct XPath
|
74 |
timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
|
75 |
timestamp = timestamp_element.text
|
76 |
+
except Exception:
|
77 |
+
try:
|
78 |
+
# Try a more general XPath
|
79 |
+
timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[contains(@class, "time-text")]')
|
80 |
+
timestamp = timestamp_element.text
|
81 |
+
except Exception:
|
82 |
+
try:
|
83 |
+
#try grabbing the a tag.
|
84 |
+
timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//a[@id="time"]')
|
85 |
+
timestamp = timestamp_element.text
|
86 |
+
except Exception as inner_e:
|
87 |
+
print(f"Timestamp not found for comment: {comment.text}. Error: {inner_e}") #debug
|
88 |
+
data.append({"Comment": comment.text, "comment_date": timestamp})
|
89 |
+
|
90 |
except Exception as e:
|
91 |
st.error(f"Exception during comment extraction: {e}")
|
92 |
driver.quit()
|
93 |
+
df = pd.DataFrame(data, columns=["Comment", "comment_date"])
|
94 |
+
|
95 |
+
if not df.empty and not df['Comment'].tolist() == []:
|
96 |
+
st.dataframe(df)
|
97 |
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
98 |
with torch.no_grad():
|
99 |
logits = model(**inputs).logits
|
|
|
103 |
for i, label in enumerate(predicted_labels):
|
104 |
results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
|
105 |
sentiment_df = pd.DataFrame(results)
|
106 |
+
|
107 |
value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
|
108 |
final_df = value_counts1
|
109 |
tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
|
|
|
111 |
fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
|
112 |
fig1.update_traces(textposition='inside', textinfo='percent+label')
|
113 |
st.plotly_chart(fig1)
|
114 |
+
|
115 |
result = pd.concat([df, sentiment_df], axis=1)
|
116 |
st.dataframe(result)
|
117 |
+
|
118 |
with tab2:
|
119 |
fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
|
120 |
st.plotly_chart(fig2)
|
121 |
+
|
122 |
csv = result.to_csv(index=False)
|
123 |
+
st.download_button(label="Download data as CSV", data=csv, file_name='Summary of the results.csv', mime='text/csv')
|
124 |
+
else:
|
125 |
+
st.warning("No comments were scraped. Sentiment analysis could not be performed.")
|
126 |
+
|
|
|
|
|
127 |
else:
|
128 |
st.warning("Please enter a URL.")
|
129 |
else:
|
130 |
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
131 |
|
132 |
if 'url_count' in st.session_state:
|
133 |
+
st.write(f"URL pasted {st.session_state['url_count']} times.")
|