Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

youtube-sentiment-analysis-app / app.py

nlpblogs

Update app.py

feca4a5 verified about 1 month ago

raw

history blame

8.22 kB

	import streamlit as st

	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.options import Options

	from selenium.webdriver.chrome.service import Service

	import pandas as pd

	from selenium.webdriver.common.keys import Keys

	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	import time
	import sys
	from datetime import datetime


	from webdriver_manager.chrome import ChromeDriverManager
	from selenium.webdriver.chrome.service import Service as ChromeService

	from webdriver_manager.core.os_manager import ChromeType

	import re


	import transformers
	from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
	import io
	import plotly.express as px
	import zipfile
	import torch

	import matplotlib.pyplot as plt
	from wordcloud import WordCloud
	from nltk.corpus import stopwords

	import nltk
	nltk.download('stopwords')



	with st.sidebar:
	st.button("DEMO APP", type="primary")


	expander = st.expander("Important notes on the YouTube Comments Sentiment Analysis App")
	expander.write('''


	How to Use
	This app works with a YouTube URL. Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.


	Usage Limits
	You can perform sentiment analysis on YouTube Comments up to 5 times.


	Subscription Management
	This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]


	Customization
	To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.


	Charts
	Hover to interact with and download the charts.


	File Handling and Errors
	For any errors or inquiries, please contact us at [email protected]



	''')


	st.subheader("YouTube Comments Sentiment Analysis", divider="red")
	tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
	model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")

	if 'url_count' not in st.session_state:
	st.session_state['url_count'] = 0

	max_attempts = 2

	def update_url_count():
	st.session_state['url_count'] += 1

	def clear_question():
	st.session_state["url"] = ""

	url = st.text_input("Enter YouTube URL:", key="url")
	st.button("Clear question", on_click=clear_question)

	if st.button("Sentiment Analysis", type="secondary"):
	if st.session_state['url_count'] < max_attempts:
	if url:
	with st.spinner("Wait for it...", show_time=True):
	options = Options()
	options.add_argument("--headless")
	options.add_argument("--disable-gpu")
	options.add_argument("--no-sandbox")
	options.add_argument("--disable-dev-shm-usage")
	options.add_argument("--start-maximized")
	service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())
	driver = webdriver.Chrome(service=service, options=options)
	data = []
	wait = WebDriverWait(driver, 30)
	driver.get(url)

	placeholder = st.empty()
	progress_bar = st.progress(0)

	for item in range(30):
	try:
	body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
	body.send_keys(Keys.END)
	placeholder.text(f"Scrolled {item + 1} times")
	progress_bar.progress((item + 1) / 150)
	time.sleep(0.5)
	except Exception as e:
	st.error(f"Exception during scrolling: {e}")
	break

	placeholder.text("Scrolling complete.")
	progress_bar.empty()

	data = []
	try:
	wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
	comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
	user_id = 1
	for comment in comments:
	timestamp = datetime.now().strftime("%Y-%m-%d")
	data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
	user_id += 1
	data = [dict(t) for t in {tuple(d.items()) for d in data}]
	except Exception as e:
	st.error(f"Exception during comment extraction: {e}")
	driver.quit()
	df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
	st.dataframe(df)

	if tokenizer and model:
	inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
	with torch.no_grad():
	logits = model(**inputs).logits
	predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
	predicted_labels = predicted_probabilities.argmax(dim=1)
	results = []
	for i, label in enumerate(predicted_labels):
	results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
	sentiment_df = pd.DataFrame(results)

	value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
	final_df = value_counts1
	tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
	with tab1:
	fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
	fig1.update_traces(textposition='inside', textinfo='percent+label')
	st.plotly_chart(fig1)

	result = pd.concat([df, sentiment_df], axis=1)
	st.dataframe(result)

	with tab2:
	fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
	st.plotly_chart(fig2)

	text = " ".join(review for review in df['Review'])
	stopwords = set(stopwords.words('english'))
	text = re.sub('[^A-Za-z]+', ' ', text)
	words = text.split()
	clean_text = [word for word in words if word.lower() not in stopwords]
	clean_text = ' '.join(clean_text)
	stopwords = set(stopwords.words('english'))
	wc = WordCloud(width=800, height=400, background_color='white').generate(clean_text)
	fig = plt.figure(figsize=(12,6))
	plt.imshow(wc, interpolation='bilinear')
	plt.axis('off')

	st.pyplot(fig)


	csv = result.to_csv(index=False)
	st.download_button(
	label="Download data as CSV",
	data=csv,
	file_name='Summary of the results.csv',
	mime='text/csv',
	)

	else:
	st.warning("Please enter a URL.")
	else:
	st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")

	if 'url_count' in st.session_state: #added if statement.
	st.write(f"URL pasted {st.session_state['url_count']} times.")