|
import streamlit as st |
|
|
|
from selenium import webdriver |
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.chrome.options import Options |
|
|
|
from selenium.webdriver.chrome.service import Service |
|
|
|
import pandas as pd |
|
|
|
from selenium.webdriver.common.keys import Keys |
|
|
|
from selenium.webdriver.support.ui import WebDriverWait |
|
from selenium.webdriver.support import expected_conditions as EC |
|
import time |
|
import sys |
|
from datetime import datetime |
|
|
|
|
|
from webdriver_manager.chrome import ChromeDriverManager |
|
from selenium.webdriver.chrome.service import Service as ChromeService |
|
|
|
from webdriver_manager.core.os_manager import ChromeType |
|
|
|
import re |
|
|
|
|
|
import transformers |
|
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification |
|
import io |
|
import plotly.express as px |
|
import zipfile |
|
import torch |
|
from streamlit_extras.stylable_container import stylable_container |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis") |
|
model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis") |
|
|
|
def clear_question(): |
|
st.session_state["youtube_video_url"] = "" |
|
|
|
youtube_video_url = st.text_input("Enter Google Maps URL:", key="youtube_video_url") |
|
st.button("Clear question", on_click=clear_question) |
|
|
|
if st.button("Scrape Reviews"): |
|
options = Options() |
|
options.add_argument("--headless") |
|
options.add_argument("--disable-gpu") |
|
options.add_argument("--no-sandbox") |
|
options.add_argument("--disable-dev-shm-usage") |
|
options.add_argument("--start-maximized") |
|
service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()) |
|
driver = webdriver.Chrome(service=service, options=options) |
|
data = [] |
|
wait = WebDriverWait(driver, 30) |
|
driver.get(youtube_video_url) |
|
|
|
|
|
placeholder = st.empty() |
|
progress_bar = st.progress(0) |
|
|
|
for item in range(150): |
|
try: |
|
body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body"))) |
|
body.send_keys(Keys.END) |
|
placeholder.text(f"Scrolled {item + 1} times") |
|
progress_bar.progress((item + 1) / 150) |
|
time.sleep(3) |
|
except Exception as e: |
|
st.error(f"Exception during scrolling: {e}") |
|
break |
|
|
|
placeholder.text("Scrolling complete.") |
|
progress_bar.empty() |
|
|
|
|
|
data = [] |
|
try: |
|
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents"))) |
|
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text") |
|
|
|
user_id = 1 |
|
for comment in comments: |
|
timestamp = datetime.now().strftime("%Y-%m-%d") |
|
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp}) |
|
user_id += 1 |
|
data = [dict(t) for t in {tuple(d.items()) for d in data}] |
|
|
|
except Exception as e: |
|
st.error(f"Exception during comment extraction: {e}") |
|
|
|
driver.quit() |
|
df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"]) |
|
st.dataframe(df) |
|
|
|
if tokenizer and model: |
|
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True) |
|
with torch.no_grad(): |
|
logits = model(**inputs).logits |
|
predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1) |
|
predicted_labels = predicted_probabilities.argmax(dim=1) |
|
results = [] |
|
for i, label in enumerate(predicted_labels): |
|
results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]}) |
|
sentiment_df = pd.DataFrame(results) |
|
value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count') |
|
final_df = value_counts1 |
|
fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'}) |
|
fig1.update_traces(textposition='inside', textinfo='percent+label') |
|
st.plotly_chart(fig1) |
|
|
|
|
|
result = pd.concat([df, sentiment_df], axis=1) |
|
st.dataframe(result) |
|
|
|
|
|
fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment") |
|
|
|
st.plotly_chart(fig2) |
|
|
|
buf = io.BytesIO() |
|
with zipfile.ZipFile(buf, "w") as myzip: |
|
myzip.writestr("Summary of the results.csv", result.to_csv(index=False)) |
|
with stylable_container( |
|
key="download_button", |
|
css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",): |
|
st.download_button( |
|
label="Download zip file", |
|
data=buf.getvalue(), |
|
file_name="zip_file.zip", |
|
mime="application/zip", |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|