Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

nlpblogs commited on Apr 9

Commit

4307f05

verified ·

1 Parent(s): 8cb61a4

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -23

app.py CHANGED Viewed

@@ -109,10 +109,11 @@ if st.button("Sentiment Analysis", type="secondary"):
                 for item in range(30):
                     try:
-                        driver.execute_script("window.scrollBy(0, 500);")
-                        WebDriverWait(driver, 10).until(
-                            EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text"))
-                        )
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
@@ -124,16 +125,9 @@ if st.button("Sentiment Analysis", type="secondary"):
                 try:
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
-                    timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a")
                     user_id = 1
-                    for comment, timestamp_element in zip(comments, timestamps):
-                        timestamp_text = timestamp_element.get_attribute('href')
-                        timestamp_text = timestamp_text.split("lc=")[-1]
-                        try:
-                            timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
-                        except:
-                            timestamp = "Timestamp not found"
                         data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
                         user_id += 1
                     data = [dict(t) for t in {tuple(d.items()) for d in data}]
@@ -143,7 +137,7 @@ if st.button("Sentiment Analysis", type="secondary"):
                 df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
                 st.dataframe(df)
-                if not df.empty and not df['Comment'].tolist() == []: #added second check
                     inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
                     with torch.no_grad():
                         logits = model(**inputs).logits
@@ -176,21 +170,14 @@ if st.button("Sentiment Analysis", type="secondary"):
                         file_name='Summary of the results.csv',
                         mime='text/csv',
                     )
-                else:
-                    st.warning("No comments were scraped. Sentiment analysis could not be performed.")
         else:
             st.warning("Please enter a URL.")
     else:
         st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
-if 'url_count' in st.session_state:
     st.write(f"URL pasted {st.session_state['url_count']} times.")

                 for item in range(30):
                     try:
+                        body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
+                        body.send_keys(Keys.END)
+                        placeholder.text(f"Scrolled {item + 1} times")
+                        progress_bar.progress((item + 1) / 150)
+                        time.sleep(0.5)
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
                 try:
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                     user_id = 1
+                    for comment in comments:
+                        timestamp = datetime.now().strftime("%Y-%m-%d")
                         data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
                         user_id += 1
                     data = [dict(t) for t in {tuple(d.items()) for d in data}]
                 df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
                 st.dataframe(df)
+                if tokenizer and model:
                     inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
                     with torch.no_grad():
                         logits = model(**inputs).logits
                         file_name='Summary of the results.csv',
                         mime='text/csv',
                     )
         else:
             st.warning("Please enter a URL.")
     else:
         st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
+if 'url_count' in st.session_state: #added if statement.
     st.write(f"URL pasted {st.session_state['url_count']} times.")