Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

nlpblogs commited on Apr 9

Commit

8cb61a4

verified ·

1 Parent(s): 983408d

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -11

app.py CHANGED Viewed

@@ -109,11 +109,10 @@ if st.button("Sentiment Analysis", type="secondary"):
                 for item in range(30):
                     try:
-                        body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
-                        body.send_keys(Keys.END)
-                        placeholder.text(f"Scrolled {item + 1} times")
-                        progress_bar.progress((item + 1) / 150)
-                        time.sleep(0.5)
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
@@ -125,14 +124,14 @@ if st.button("Sentiment Analysis", type="secondary"):
                 try:
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
-                    timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a") #timestamp elements.
                     user_id = 1
                     for comment, timestamp_element in zip(comments, timestamps):
                         timestamp_text = timestamp_element.get_attribute('href')
                         timestamp_text = timestamp_text.split("lc=")[-1]
                         try:
-                            timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d")
                         except:
                             timestamp = "Timestamp not found"
                         data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
@@ -144,7 +143,7 @@ if st.button("Sentiment Analysis", type="secondary"):
                 df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
                 st.dataframe(df)
-                if tokenizer and model:
                     inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
                     with torch.no_grad():
                         logits = model(**inputs).logits
@@ -177,16 +176,17 @@ if st.button("Sentiment Analysis", type="secondary"):
                         file_name='Summary of the results.csv',
                         mime='text/csv',
                     )
         else:
             st.warning("Please enter a URL.")
     else:
         st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
-if 'url_count' in st.session_state: #added if statement.
     st.write(f"URL pasted {st.session_state['url_count']} times.")

                 for item in range(30):
                     try:
+                        driver.execute_script("window.scrollBy(0, 500);")
+                        WebDriverWait(driver, 10).until(
+                            EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text"))
+                        )
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
                 try:
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
+                    timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a")
                     user_id = 1
                     for comment, timestamp_element in zip(comments, timestamps):
                         timestamp_text = timestamp_element.get_attribute('href')
                         timestamp_text = timestamp_text.split("lc=")[-1]
                         try:
+                            timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
                         except:
                             timestamp = "Timestamp not found"
                         data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
                 df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
                 st.dataframe(df)
+                if not df.empty and not df['Comment'].tolist() == []: #added second check
                     inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
                     with torch.no_grad():
                         logits = model(**inputs).logits
                         file_name='Summary of the results.csv',
                         mime='text/csv',
                     )
+                else:
+                    st.warning("No comments were scraped. Sentiment analysis could not be performed.")
         else:
             st.warning("Please enter a URL.")
     else:
         st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
+if 'url_count' in st.session_state:
     st.write(f"URL pasted {st.session_state['url_count']} times.")