Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

youtube-sentiment-analysis-app / app.py

nlpblogs

Create app.py

ca12572 verified about 1 month ago

raw

history blame

5.3 kB

	import streamlit as st

	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.options import Options

	from selenium.webdriver.chrome.service import Service

	import pandas as pd

	from selenium.webdriver.common.keys import Keys

	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	import time
	import sys
	from datetime import datetime


	from webdriver_manager.chrome import ChromeDriverManager
	from selenium.webdriver.chrome.service import Service as ChromeService

	from webdriver_manager.core.os_manager import ChromeType

	import re


	import transformers
	from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
	import io
	import plotly.express as px
	import zipfile
	import torch
	from streamlit_extras.stylable_container import stylable_container








	tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
	model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")

	def clear_question():
	st.session_state["youtube_video_url"] = ""

	youtube_video_url = st.text_input("Enter Google Maps URL:", key="youtube_video_url")
	st.button("Clear question", on_click=clear_question)

	if st.button("Scrape Reviews"):
	options = Options()
	options.add_argument("--headless")
	options.add_argument("--disable-gpu")
	options.add_argument("--no-sandbox")
	options.add_argument("--disable-dev-shm-usage")
	options.add_argument("--start-maximized")
	service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())
	driver = webdriver.Chrome(service=service, options=options)
	data = []
	wait = WebDriverWait(driver, 30)
	driver.get(youtube_video_url)


	placeholder = st.empty() # Create an empty placeholder for dynamic text
	progress_bar = st.progress(0) # Create a progress bar

	for item in range(150):
	try:
	body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
	body.send_keys(Keys.END)
	placeholder.text(f"Scrolled {item + 1} times") # Update placeholder text
	progress_bar.progress((item + 1) / 150) # Update progress bar
	time.sleep(3) # Increased sleep time for better loading
	except Exception as e:
	st.error(f"Exception during scrolling: {e}")
	break

	placeholder.text("Scrolling complete.") #show completion message.
	progress_bar.empty() #remove progress bar.


	data = []
	try:
	wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
	comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")

	user_id = 1 # Initialize unique user ID
	for comment in comments:
	timestamp = datetime.now().strftime("%Y-%m-%d")
	data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
	user_id += 1
	data = [dict(t) for t in {tuple(d.items()) for d in data}]

	except Exception as e:
	st.error(f"Exception during comment extraction: {e}")

	driver.quit()
	df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
	st.dataframe(df)

	if tokenizer and model:
	inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
	with torch.no_grad():
	logits = model(**inputs).logits
	predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
	predicted_labels = predicted_probabilities.argmax(dim=1)
	results = []
	for i, label in enumerate(predicted_labels):
	results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
	sentiment_df = pd.DataFrame(results)
	value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
	final_df = value_counts1
	fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
	fig1.update_traces(textposition='inside', textinfo='percent+label')
	st.plotly_chart(fig1)


	result = pd.concat([df, sentiment_df], axis=1)
	st.dataframe(result)


	fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")

	st.plotly_chart(fig2)

	buf = io.BytesIO()
	with zipfile.ZipFile(buf, "w") as myzip:
	myzip.writestr("Summary of the results.csv", result.to_csv(index=False))
	with stylable_container(
	key="download_button",
	css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",):
	st.download_button(
	label="Download zip file",
	data=buf.getvalue(),
	file_name="zip_file.zip",
	mime="application/zip",
	)