Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

nlpblogs commited on Apr 9

Commit

f465edc

verified ·

1 Parent(s): 8377b91

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -11

app.py CHANGED Viewed

@@ -70,6 +70,9 @@ with st.sidebar:
 ''')
 st.subheader("YouTube Comments Sentiment Analysis", divider="red")
 tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -107,13 +110,12 @@ if st.button("Sentiment Analysis", type="secondary"):
                 placeholder = st.empty()
                 progress_bar = st.progress(0)
-                for item in range(150):
                     try:
-                        body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
-                        body.send_keys(Keys.END)
-                        placeholder.text(f"Scrolled {item + 1} times")
-                        progress_bar.progress((item + 1) / 150)
-                        time.sleep(0.5)
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
@@ -125,9 +127,16 @@ if st.button("Sentiment Analysis", type="secondary"):
                 try:
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                     user_id = 1
-                    for comment in comments:
-                        timestamp = datetime.now().strftime("%Y-%m-%d")
                         data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
                         user_id += 1
                     data = [dict(t) for t in {tuple(d.items()) for d in data}]
@@ -170,16 +179,14 @@ if st.button("Sentiment Analysis", type="secondary"):
                         file_name='Summary of the results.csv',
                         mime='text/csv',
                     )
         else:
             st.warning("Please enter a URL.")
     else:
         st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
-if 'url_count' in st.session_state: #added if statement.
     st.write(f"URL pasted {st.session_state['url_count']} times.")

 ''')
 st.subheader("YouTube Comments Sentiment Analysis", divider="red")
 tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
                 placeholder = st.empty()
                 progress_bar = st.progress(0)
+                for item in range(30): #Reduced to 30 scrolls.
                     try:
+                        driver.execute_script("window.scrollBy(0, 500);") #javascript scroll
+                        WebDriverWait(driver, 10).until(
+                            EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text"))
+                        )
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
                 try:
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
+                    timestamps = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a") #timestamp elements.
                     user_id = 1
+                    for comment, timestamp_element in zip(comments, timestamps):
+                        timestamp_text = timestamp_element.get_attribute('href')
+                        timestamp_text = timestamp_text.split("lc=")[-1]
+                        try:
+                            timestamp = datetime.fromtimestamp(int(timestamp_text,36)/1000).strftime("%Y-%m-%d %H:%M:%S")
+                        except:
+                            timestamp = "Timestamp not found"
                         data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
                         user_id += 1
                     data = [dict(t) for t in {tuple(d.items()) for d in data}]
                         file_name='Summary of the results.csv',
                         mime='text/csv',
                     )
         else:
             st.warning("Please enter a URL.")
     else:
         st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
+if 'url_count' in st.session_state:
     st.write(f"URL pasted {st.session_state['url_count']} times.")