nlpblogs commited on
Commit
8cb61a4
·
verified ·
1 Parent(s): 983408d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -109,11 +109,10 @@ if st.button("Sentiment Analysis", type="secondary"):
109
 
110
  for item in range(30):
111
  try:
112
- body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
113
- body.send_keys(Keys.END)
114
- placeholder.text(f"Scrolled {item + 1} times")
115
- progress_bar.progress((item + 1) / 150)
116
- time.sleep(0.5)
117
  except Exception as e:
118
  st.error(f"Exception during scrolling: {e}")
119
  break
@@ -125,14 +124,14 @@ if st.button("Sentiment Analysis", type="secondary"):
125
  try:
126
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
128
- timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a") #timestamp elements.
129
 
130
  user_id = 1
131
  for comment, timestamp_element in zip(comments, timestamps):
132
  timestamp_text = timestamp_element.get_attribute('href')
133
  timestamp_text = timestamp_text.split("lc=")[-1]
134
  try:
135
- timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d")
136
  except:
137
  timestamp = "Timestamp not found"
138
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
@@ -144,7 +143,7 @@ if st.button("Sentiment Analysis", type="secondary"):
144
  df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
145
  st.dataframe(df)
146
 
147
- if tokenizer and model:
148
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
149
  with torch.no_grad():
150
  logits = model(**inputs).logits
@@ -177,16 +176,17 @@ if st.button("Sentiment Analysis", type="secondary"):
177
  file_name='Summary of the results.csv',
178
  mime='text/csv',
179
  )
180
-
 
 
181
  else:
182
  st.warning("Please enter a URL.")
183
  else:
184
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
185
 
186
- if 'url_count' in st.session_state: #added if statement.
187
  st.write(f"URL pasted {st.session_state['url_count']} times.")
188
 
189
-
190
 
191
 
192
 
 
109
 
110
  for item in range(30):
111
  try:
112
+ driver.execute_script("window.scrollBy(0, 500);")
113
+ WebDriverWait(driver, 10).until(
114
+ EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text"))
115
+ )
 
116
  except Exception as e:
117
  st.error(f"Exception during scrolling: {e}")
118
  break
 
124
  try:
125
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
126
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
127
+ timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a")
128
 
129
  user_id = 1
130
  for comment, timestamp_element in zip(comments, timestamps):
131
  timestamp_text = timestamp_element.get_attribute('href')
132
  timestamp_text = timestamp_text.split("lc=")[-1]
133
  try:
134
+ timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
135
  except:
136
  timestamp = "Timestamp not found"
137
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
 
143
  df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
144
  st.dataframe(df)
145
 
146
+ if not df.empty and not df['Comment'].tolist() == []: #added second check
147
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
148
  with torch.no_grad():
149
  logits = model(**inputs).logits
 
176
  file_name='Summary of the results.csv',
177
  mime='text/csv',
178
  )
179
+ else:
180
+ st.warning("No comments were scraped. Sentiment analysis could not be performed.")
181
+
182
  else:
183
  st.warning("Please enter a URL.")
184
  else:
185
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
186
 
187
+ if 'url_count' in st.session_state:
188
  st.write(f"URL pasted {st.session_state['url_count']} times.")
189
 
 
190
 
191
 
192