nlpblogs commited on
Commit
ca12572
·
verified ·
1 Parent(s): f028976

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from selenium import webdriver
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.chrome.options import Options
6
+
7
+ from selenium.webdriver.chrome.service import Service
8
+
9
+ import pandas as pd
10
+
11
+ from selenium.webdriver.common.keys import Keys
12
+
13
+ from selenium.webdriver.support.ui import WebDriverWait
14
+ from selenium.webdriver.support import expected_conditions as EC
15
+ import time
16
+ import sys
17
+ from datetime import datetime
18
+
19
+
20
+ from webdriver_manager.chrome import ChromeDriverManager
21
+ from selenium.webdriver.chrome.service import Service as ChromeService
22
+
23
+ from webdriver_manager.core.os_manager import ChromeType
24
+
25
+ import re
26
+
27
+
28
+ import transformers
29
+ from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
30
+ import io
31
+ import plotly.express as px
32
+ import zipfile
33
+ import torch
34
+ from streamlit_extras.stylable_container import stylable_container
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+
43
+ tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
44
+ model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
45
+
46
+ def clear_question():
47
+ st.session_state["youtube_video_url"] = ""
48
+
49
+ youtube_video_url = st.text_input("Enter Google Maps URL:", key="youtube_video_url")
50
+ st.button("Clear question", on_click=clear_question)
51
+
52
+ if st.button("Scrape Reviews"):
53
+ options = Options()
54
+ options.add_argument("--headless")
55
+ options.add_argument("--disable-gpu")
56
+ options.add_argument("--no-sandbox")
57
+ options.add_argument("--disable-dev-shm-usage")
58
+ options.add_argument("--start-maximized")
59
+ service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())
60
+ driver = webdriver.Chrome(service=service, options=options)
61
+ data = []
62
+ wait = WebDriverWait(driver, 30)
63
+ driver.get(youtube_video_url)
64
+
65
+
66
+ placeholder = st.empty() # Create an empty placeholder for dynamic text
67
+ progress_bar = st.progress(0) # Create a progress bar
68
+
69
+ for item in range(150):
70
+ try:
71
+ body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
72
+ body.send_keys(Keys.END)
73
+ placeholder.text(f"Scrolled {item + 1} times") # Update placeholder text
74
+ progress_bar.progress((item + 1) / 150) # Update progress bar
75
+ time.sleep(3) # Increased sleep time for better loading
76
+ except Exception as e:
77
+ st.error(f"Exception during scrolling: {e}")
78
+ break
79
+
80
+ placeholder.text("Scrolling complete.") #show completion message.
81
+ progress_bar.empty() #remove progress bar.
82
+
83
+
84
+ data = []
85
+ try:
86
+ wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
87
+ comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
88
+
89
+ user_id = 1 # Initialize unique user ID
90
+ for comment in comments:
91
+ timestamp = datetime.now().strftime("%Y-%m-%d")
92
+ data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
93
+ user_id += 1
94
+ data = [dict(t) for t in {tuple(d.items()) for d in data}]
95
+
96
+ except Exception as e:
97
+ st.error(f"Exception during comment extraction: {e}")
98
+
99
+ driver.quit()
100
+ df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
101
+ st.dataframe(df)
102
+
103
+ if tokenizer and model:
104
+ inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
105
+ with torch.no_grad():
106
+ logits = model(**inputs).logits
107
+ predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
108
+ predicted_labels = predicted_probabilities.argmax(dim=1)
109
+ results = []
110
+ for i, label in enumerate(predicted_labels):
111
+ results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
112
+ sentiment_df = pd.DataFrame(results)
113
+ value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
114
+ final_df = value_counts1
115
+ fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
116
+ fig1.update_traces(textposition='inside', textinfo='percent+label')
117
+ st.plotly_chart(fig1)
118
+
119
+
120
+ result = pd.concat([df, sentiment_df], axis=1)
121
+ st.dataframe(result)
122
+
123
+
124
+ fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
125
+
126
+ st.plotly_chart(fig2)
127
+
128
+ buf = io.BytesIO()
129
+ with zipfile.ZipFile(buf, "w") as myzip:
130
+ myzip.writestr("Summary of the results.csv", result.to_csv(index=False))
131
+ with stylable_container(
132
+ key="download_button",
133
+ css_styles="""button { background-color: yellow; border: 1px solid black; padding: 5px; color: black; }""",):
134
+ st.download_button(
135
+ label="Download zip file",
136
+ data=buf.getvalue(),
137
+ file_name="zip_file.zip",
138
+ mime="application/zip",
139
+ )
140
+
141
+
142
+
143
+
144
+
145
+
146
+