File size: 8,223 Bytes
c6127ba ca12572 c6127ba ca12572 c6127ba ca12572 c6127ba 2096041 c6127ba ca12572 c6127ba 2096041 c6127ba ca12572 2096041 c6127ba 2096041 c6127ba ca12572 c6127ba 5a1ddac c6127ba feca4a5 c6127ba ca12572 ea96c2b ca12572 ea96c2b eae5fab ea96c2b eae5fab ca12572 eae5fab ca12572 eae5fab ca12572 eae5fab c6127ba 30bf2ff c6127ba 30bf2ff 2096041 4307f05 2096041 30bf2ff c6127ba 30bf2ff feca4a5 54fb744 feca4a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import streamlit as st
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import pandas as pd
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import sys
from datetime import datetime
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.core.os_manager import ChromeType
import re
import transformers
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import io
import plotly.express as px
import zipfile
import torch
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
with st.sidebar:
st.button("DEMO APP", type="primary")
expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
expander.write('''
**How to Use**
This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
**Usage Limits**
You can perform sentiment analysis on YouTube Comments up to 5 times.
**Subscription Management**
This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
**Customization**
To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
**Charts**
Hover to interact with and download the charts.
**File Handling and Errors**
For any errors or inquiries, please contact us at [email protected]
''')
st.subheader("YouTube Comments Sentiment Analysis", divider="red")
tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
if 'url_count' not in st.session_state:
st.session_state['url_count'] = 0
max_attempts = 2
def update_url_count():
st.session_state['url_count'] += 1
def clear_question():
st.session_state["url"] = ""
url = st.text_input("Enter YouTube URL:", key="url")
st.button("Clear question", on_click=clear_question)
if st.button("Sentiment Analysis", type="secondary"):
if st.session_state['url_count'] < max_attempts:
if url:
with st.spinner("Wait for it...", show_time=True):
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--start-maximized")
service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())
driver = webdriver.Chrome(service=service, options=options)
data = []
wait = WebDriverWait(driver, 30)
driver.get(url)
placeholder = st.empty()
progress_bar = st.progress(0)
for item in range(30):
try:
body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
body.send_keys(Keys.END)
placeholder.text(f"Scrolled {item + 1} times")
progress_bar.progress((item + 1) / 150)
time.sleep(0.5)
except Exception as e:
st.error(f"Exception during scrolling: {e}")
break
placeholder.text("Scrolling complete.")
progress_bar.empty()
data = []
try:
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
user_id = 1
for comment in comments:
timestamp = datetime.now().strftime("%Y-%m-%d")
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
user_id += 1
data = [dict(t) for t in {tuple(d.items()) for d in data}]
except Exception as e:
st.error(f"Exception during comment extraction: {e}")
driver.quit()
df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
st.dataframe(df)
if tokenizer and model:
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
logits = model(**inputs).logits
predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
predicted_labels = predicted_probabilities.argmax(dim=1)
results = []
for i, label in enumerate(predicted_labels):
results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
sentiment_df = pd.DataFrame(results)
value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
final_df = value_counts1
tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
with tab1:
fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
fig1.update_traces(textposition='inside', textinfo='percent+label')
st.plotly_chart(fig1)
result = pd.concat([df, sentiment_df], axis=1)
st.dataframe(result)
with tab2:
fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
st.plotly_chart(fig2)
text = " ".join(review for review in df['Review'])
stopwords = set(stopwords.words('english'))
text = re.sub('[^A-Za-z]+', ' ', text)
words = text.split()
clean_text = [word for word in words if word.lower() not in stopwords]
clean_text = ' '.join(clean_text)
stopwords = set(stopwords.words('english'))
wc = WordCloud(width=800, height=400, background_color='white').generate(clean_text)
fig = plt.figure(figsize=(12,6))
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
st.pyplot(fig)
csv = result.to_csv(index=False)
st.download_button(
label="Download data as CSV",
data=csv,
file_name='Summary of the results.csv',
mime='text/csv',
)
else:
st.warning("Please enter a URL.")
else:
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
if 'url_count' in st.session_state: #added if statement.
st.write(f"URL pasted {st.session_state['url_count']} times.")
|