Update app.py
Browse files
app.py
CHANGED
@@ -109,11 +109,10 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
109 |
|
110 |
for item in range(30):
|
111 |
try:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
time.sleep(0.5)
|
117 |
except Exception as e:
|
118 |
st.error(f"Exception during scrolling: {e}")
|
119 |
break
|
@@ -125,14 +124,14 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
125 |
try:
|
126 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
127 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
128 |
-
timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a")
|
129 |
|
130 |
user_id = 1
|
131 |
for comment, timestamp_element in zip(comments, timestamps):
|
132 |
timestamp_text = timestamp_element.get_attribute('href')
|
133 |
timestamp_text = timestamp_text.split("lc=")[-1]
|
134 |
try:
|
135 |
-
timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d")
|
136 |
except:
|
137 |
timestamp = "Timestamp not found"
|
138 |
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
|
@@ -144,7 +143,7 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
144 |
df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
|
145 |
st.dataframe(df)
|
146 |
|
147 |
-
if
|
148 |
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
149 |
with torch.no_grad():
|
150 |
logits = model(**inputs).logits
|
@@ -177,16 +176,17 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
177 |
file_name='Summary of the results.csv',
|
178 |
mime='text/csv',
|
179 |
)
|
180 |
-
|
|
|
|
|
181 |
else:
|
182 |
st.warning("Please enter a URL.")
|
183 |
else:
|
184 |
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
185 |
|
186 |
-
if 'url_count' in st.session_state:
|
187 |
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
188 |
|
189 |
-
|
190 |
|
191 |
|
192 |
|
|
|
109 |
|
110 |
for item in range(30):
|
111 |
try:
|
112 |
+
driver.execute_script("window.scrollBy(0, 500);")
|
113 |
+
WebDriverWait(driver, 10).until(
|
114 |
+
EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text"))
|
115 |
+
)
|
|
|
116 |
except Exception as e:
|
117 |
st.error(f"Exception during scrolling: {e}")
|
118 |
break
|
|
|
124 |
try:
|
125 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
126 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
127 |
+
timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a")
|
128 |
|
129 |
user_id = 1
|
130 |
for comment, timestamp_element in zip(comments, timestamps):
|
131 |
timestamp_text = timestamp_element.get_attribute('href')
|
132 |
timestamp_text = timestamp_text.split("lc=")[-1]
|
133 |
try:
|
134 |
+
timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
|
135 |
except:
|
136 |
timestamp = "Timestamp not found"
|
137 |
data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
|
|
|
143 |
df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
|
144 |
st.dataframe(df)
|
145 |
|
146 |
+
if not df.empty and not df['Comment'].tolist() == []: #added second check
|
147 |
inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
|
148 |
with torch.no_grad():
|
149 |
logits = model(**inputs).logits
|
|
|
176 |
file_name='Summary of the results.csv',
|
177 |
mime='text/csv',
|
178 |
)
|
179 |
+
else:
|
180 |
+
st.warning("No comments were scraped. Sentiment analysis could not be performed.")
|
181 |
+
|
182 |
else:
|
183 |
st.warning("Please enter a URL.")
|
184 |
else:
|
185 |
st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
|
186 |
|
187 |
+
if 'url_count' in st.session_state:
|
188 |
st.write(f"URL pasted {st.session_state['url_count']} times.")
|
189 |
|
|
|
190 |
|
191 |
|
192 |
|