nlpblogs commited on
Commit
4307f05
·
verified ·
1 Parent(s): 8cb61a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -23
app.py CHANGED
@@ -109,10 +109,11 @@ if st.button("Sentiment Analysis", type="secondary"):
109
 
110
  for item in range(30):
111
  try:
112
- driver.execute_script("window.scrollBy(0, 500);")
113
- WebDriverWait(driver, 10).until(
114
- EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text"))
115
- )
 
116
  except Exception as e:
117
  st.error(f"Exception during scrolling: {e}")
118
  break
@@ -124,16 +125,9 @@ if st.button("Sentiment Analysis", type="secondary"):
124
  try:
125
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
126
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
127
- timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a")
128
-
129
  user_id = 1
130
- for comment, timestamp_element in zip(comments, timestamps):
131
- timestamp_text = timestamp_element.get_attribute('href')
132
- timestamp_text = timestamp_text.split("lc=")[-1]
133
- try:
134
- timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
135
- except:
136
- timestamp = "Timestamp not found"
137
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
138
  user_id += 1
139
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
@@ -143,7 +137,7 @@ if st.button("Sentiment Analysis", type="secondary"):
143
  df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
144
  st.dataframe(df)
145
 
146
- if not df.empty and not df['Comment'].tolist() == []: #added second check
147
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
148
  with torch.no_grad():
149
  logits = model(**inputs).logits
@@ -176,21 +170,14 @@ if st.button("Sentiment Analysis", type="secondary"):
176
  file_name='Summary of the results.csv',
177
  mime='text/csv',
178
  )
179
- else:
180
- st.warning("No comments were scraped. Sentiment analysis could not be performed.")
181
-
182
  else:
183
  st.warning("Please enter a URL.")
184
  else:
185
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
186
 
187
- if 'url_count' in st.session_state:
188
  st.write(f"URL pasted {st.session_state['url_count']} times.")
189
 
190
-
191
-
192
-
193
-
194
 
195
 
196
-
 
109
 
110
  for item in range(30):
111
  try:
112
+ body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
113
+ body.send_keys(Keys.END)
114
+ placeholder.text(f"Scrolled {item + 1} times")
115
+ progress_bar.progress((item + 1) / 150)
116
+ time.sleep(0.5)
117
  except Exception as e:
118
  st.error(f"Exception during scrolling: {e}")
119
  break
 
125
  try:
126
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
127
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
 
 
128
  user_id = 1
129
+ for comment in comments:
130
+ timestamp = datetime.now().strftime("%Y-%m-%d")
 
 
 
 
 
131
  data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
132
  user_id += 1
133
  data = [dict(t) for t in {tuple(d.items()) for d in data}]
 
137
  df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
138
  st.dataframe(df)
139
 
140
+ if tokenizer and model:
141
  inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
142
  with torch.no_grad():
143
  logits = model(**inputs).logits
 
170
  file_name='Summary of the results.csv',
171
  mime='text/csv',
172
  )
173
+
 
 
174
  else:
175
  st.warning("Please enter a URL.")
176
  else:
177
  st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
178
 
179
+ if 'url_count' in st.session_state: #added if statement.
180
  st.write(f"URL pasted {st.session_state['url_count']} times.")
181
 
 
 
 
 
182
 
183