Update app.py
Browse files
app.py
CHANGED
@@ -70,6 +70,9 @@ with st.sidebar:
|
|
70 |
''')
|
71 |
|
72 |
|
|
|
|
|
|
|
73 |
st.subheader("YouTube Comments Sentiment Analysis", divider="red")
|
74 |
tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
|
75 |
model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
|
@@ -107,13 +110,12 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
107 |
placeholder = st.empty()
|
108 |
progress_bar = st.progress(0)
|
109 |
|
110 |
-
for item in range(
|
111 |
try:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
time.sleep(0.5)
|
117 |
except Exception as e:
|
118 |
st.error(f"Exception during scrolling: {e}")
|
119 |
break
|
@@ -125,9 +127,16 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
125 |
try:
|
126 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
127 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
|
|
|
|
128 |
user_id = 1
|
129 |
-
for comment in comments:
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
131 |
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
|
132 |
user_id += 1
|
133 |
data = [dict(t) for t in {tuple(d.items()) for d in data}]
|
@@ -170,16 +179,14 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
170 |
file_name='Summary of the results.csv',
|
171 |
mime='text/csv',
|
172 |
)
|
173 |
-
|
174 |
else:
|
175 |
st.warning("Please enter a URL.")
|
176 |
else:
|
177 |
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
178 |
|
179 |
-
if 'url_count' in st.session_state:
|
180 |
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
181 |
|
182 |
-
|
183 |
|
184 |
|
185 |
|
|
|
70 |
''')
|
71 |
|
72 |
|
73 |
+
|
74 |
+
|
75 |
+
|
76 |
st.subheader("YouTube Comments Sentiment Analysis", divider="red")
|
77 |
tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
|
78 |
model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
|
|
|
110 |
placeholder = st.empty()
|
111 |
progress_bar = st.progress(0)
|
112 |
|
113 |
+
for item in range(30): #Reduced to 30 scrolls.
|
114 |
try:
|
115 |
+
driver.execute_script("window.scrollBy(0, 500);") #javascript scroll
|
116 |
+
WebDriverWait(driver, 10).until(
|
117 |
+
EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text"))
|
118 |
+
)
|
|
|
119 |
except Exception as e:
|
120 |
st.error(f"Exception during scrolling: {e}")
|
121 |
break
|
|
|
127 |
try:
|
128 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
129 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
130 |
+
timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a") #timestamp elements.
|
131 |
+
|
132 |
user_id = 1
|
133 |
+
for comment, timestamp_element in zip(comments, timestamps):
|
134 |
+
timestamp_text = timestamp_element.get_attribute('href')
|
135 |
+
timestamp_text = timestamp_text.split("lc=")[-1]
|
136 |
+
try:
|
137 |
+
timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
|
138 |
+
except:
|
139 |
+
timestamp = "Timestamp not found"
|
140 |
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
|
141 |
user_id += 1
|
142 |
data = [dict(t) for t in {tuple(d.items()) for d in data}]
|
|
|
179 |
file_name='Summary of the results.csv',
|
180 |
mime='text/csv',
|
181 |
)
|
|
|
182 |
else:
|
183 |
st.warning("Please enter a URL.")
|
184 |
else:
|
185 |
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
186 |
|
187 |
+
if 'url_count' in st.session_state:
|
188 |
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
189 |
|
|
|
190 |
|
191 |
|
192 |
|