Update app.py
Browse files
app.py
CHANGED
@@ -109,10 +109,11 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
109 |
|
110 |
for item in range(30):
|
111 |
try:
|
112 |
-
driver.
|
113 |
-
|
114 |
-
|
115 |
-
)
|
|
|
116 |
except Exception as e:
|
117 |
st.error(f"Exception during scrolling: {e}")
|
118 |
break
|
@@ -124,16 +125,9 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
124 |
try:
|
125 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
126 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
127 |
-
timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a")
|
128 |
-
|
129 |
user_id = 1
|
130 |
-
for comment
|
131 |
-
|
132 |
-
timestamp_text = timestamp_text.split("lc=")[-1]
|
133 |
-
try:
|
134 |
-
timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
|
135 |
-
except:
|
136 |
-
timestamp = "Timestamp not found"
|
137 |
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
|
138 |
user_id += 1
|
139 |
data = [dict(t) for t in {tuple(d.items()) for d in data}]
|
@@ -143,7 +137,7 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
143 |
df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
|
144 |
st.dataframe(df)
|
145 |
|
146 |
-
if
|
147 |
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
148 |
with torch.no_grad():
|
149 |
logits = model(**inputs).logits
|
@@ -176,21 +170,14 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
176 |
file_name='Summary of the results.csv',
|
177 |
mime='text/csv',
|
178 |
)
|
179 |
-
|
180 |
-
st.warning("No comments were scraped. Sentiment analysis could not be performed.")
|
181 |
-
|
182 |
else:
|
183 |
st.warning("Please enter a URL.")
|
184 |
else:
|
185 |
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
186 |
|
187 |
-
if 'url_count' in st.session_state:
|
188 |
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
|
195 |
|
196 |
-
|
|
|
109 |
|
110 |
for item in range(30):
|
111 |
try:
|
112 |
+
body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
|
113 |
+
body.send_keys(Keys.END)
|
114 |
+
placeholder.text(f"Scrolled {item + 1} times")
|
115 |
+
progress_bar.progress((item + 1) / 150)
|
116 |
+
time.sleep(0.5)
|
117 |
except Exception as e:
|
118 |
st.error(f"Exception during scrolling: {e}")
|
119 |
break
|
|
|
125 |
try:
|
126 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
127 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
|
|
|
|
128 |
user_id = 1
|
129 |
+
for comment in comments:
|
130 |
+
timestamp = datetime.now().strftime("%Y-%m-%d")
|
|
|
|
|
|
|
|
|
|
|
131 |
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
|
132 |
user_id += 1
|
133 |
data = [dict(t) for t in {tuple(d.items()) for d in data}]
|
|
|
137 |
df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
|
138 |
st.dataframe(df)
|
139 |
|
140 |
+
if tokenizer and model:
|
141 |
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
142 |
with torch.no_grad():
|
143 |
logits = model(**inputs).logits
|
|
|
170 |
file_name='Summary of the results.csv',
|
171 |
mime='text/csv',
|
172 |
)
|
173 |
+
|
|
|
|
|
174 |
else:
|
175 |
st.warning("Please enter a URL.")
|
176 |
else:
|
177 |
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
178 |
|
179 |
+
if 'url_count' in st.session_state: #added if statement.
|
180 |
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
181 |
|
|
|
|
|
|
|
|
|
182 |
|
183 |
|
|