File size: 5,298 Bytes
ca12572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import streamlit as st 

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

from selenium.webdriver.chrome.service import Service

import pandas as pd

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import sys
from datetime import datetime


from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService

from webdriver_manager.core.os_manager import ChromeType

import re


import transformers
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import io
import plotly.express as px
import zipfile
import torch 
from streamlit_extras.stylable_container import stylable_container








tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")

def clear_question():
    st.session_state["youtube_video_url"] = ""

youtube_video_url = st.text_input("Enter Google Maps URL:", key="youtube_video_url")
st.button("Clear question", on_click=clear_question)

if st.button("Scrape Reviews"):
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--start-maximized")
    service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())
    driver = webdriver.Chrome(service=service, options=options)
    data = []
    wait = WebDriverWait(driver, 30) 
    driver.get(youtube_video_url)
    
   
    placeholder = st.empty()  # Create an empty placeholder for dynamic text
    progress_bar = st.progress(0)  # Create a progress bar

    for item in range(150):
        try:
            body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
            body.send_keys(Keys.END)
            placeholder.text(f"Scrolled {item + 1} times")  # Update placeholder text
            progress_bar.progress((item + 1) / 150)  # Update progress bar
            time.sleep(3)  # Increased sleep time for better loading
        except Exception as e:
            st.error(f"Exception during scrolling: {e}")
            break

    placeholder.text("Scrolling complete.") #show completion message.
    progress_bar.empty() #remove progress bar.

   
    data = []
    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
        comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
        
        user_id = 1  # Initialize unique user ID
        for comment in comments:
            timestamp = datetime.now().strftime("%Y-%m-%d")
            data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
            user_id += 1
        data = [dict(t) for t in {tuple(d.items()) for d in data}]
    
    except Exception as e:
        st.error(f"Exception during comment extraction: {e}")

    driver.quit()
    df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
    st.dataframe(df)

    if tokenizer and model:
        inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            logits = model(**inputs).logits
            predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
            predicted_labels = predicted_probabilities.argmax(dim=1)
            results = []
            for i, label in enumerate(predicted_labels):
                results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
            sentiment_df = pd.DataFrame(results)
            value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
            final_df = value_counts1
            fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
            fig1.update_traces(textposition='inside', textinfo='percent+label')
            st.plotly_chart(fig1)
            

            result = pd.concat([df, sentiment_df], axis=1)
            st.dataframe(result)

            
            fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
         
            st.plotly_chart(fig2)

            buf = io.BytesIO()
            with zipfile.ZipFile(buf, "w") as myzip:
                myzip.writestr("Summary of the results.csv", result.to_csv(index=False))
            with stylable_container(
                    key="download_button",
                    css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",):
                st.download_button(
                        label="Download zip file",
                        data=buf.getvalue(),
                        file_name="zip_file.zip",
                        mime="application/zip",
                    )