Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

nlpblogs commited on Apr 10

Commit

31af44b

verified ·

1 Parent(s): add0c31

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -88

app.py CHANGED Viewed

@@ -1,82 +1,26 @@
-import streamlit as st
 from selenium import webdriver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service
-import pandas as pd
-from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 import time
-import sys
-from datetime import datetime
-from webdriver_manager.chrome import ChromeDriverManager
-from selenium.webdriver.chrome.service import Service as ChromeService
-from webdriver_manager.core.os_manager import ChromeType
-import re
-import transformers
-from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
-import io
 import plotly.express as px
-import zipfile
-import torch
-import matplotlib.pyplot as plt
 from wordcloud import WordCloud
 from nltk.corpus import stopwords
-import nltk
 nltk.download('stopwords')
-with st.sidebar:
-    st.button("DEMO APP", type="primary")
-    expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
-    expander.write('''
-    **How to Use**
-    This app works with a YouTube URL.  Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on your YouTube Comments.
-    **Usage Limits**
-    You can perform sentiment analysis on YouTube Comments up to 5 times.
-    **Subscription Management**
-    This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
-    **Customization**
-    To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
-    **Charts**
-    Hover to interact with and download the charts.
-    **File Handling and Errors**
-    For any errors or inquiries, please contact us at [email protected]
-''')
 st.subheader("YouTube Comments Sentiment Analysis", divider="red")
 tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
@@ -98,6 +42,7 @@ st.button("Clear question", on_click=clear_question)
 if st.button("Sentiment Analysis", type="secondary"):
     if st.session_state['url_count'] < max_attempts:
         if url:
             with st.spinner("Wait for it...", show_time=True):
                 options = Options()
                 options.add_argument("--headless")
@@ -110,31 +55,26 @@ if st.button("Sentiment Analysis", type="secondary"):
                 data = []
                 wait = WebDriverWait(driver, 30)
                 driver.get(url)
                 placeholder = st.empty()
                 progress_bar = st.progress(0)
                 for item in range(30):
                     try:
                         body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
                         body.send_keys(Keys.END)
                         placeholder.text(f"Scrolled {item + 1} times")
-                        progress_bar.progress((item + 1) / 150)
                         time.sleep(0.5)
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
                 placeholder.text("Scrolling complete.")
                 progress_bar.empty()
                 data = []
                 try:
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                     user_id = 1
                     for comment in comments:
                         data.append({"Comment": comment.text})
                         user_id += 1
                     data = [dict(t) for t in {tuple(d.items()) for d in data}]
@@ -143,7 +83,6 @@ if st.button("Sentiment Analysis", type="secondary"):
                 driver.quit()
                 df = pd.DataFrame(data, columns=["Comment"])
                 st.dataframe(df)
                 if tokenizer and model:
                     inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
                     with torch.no_grad():
@@ -154,7 +93,6 @@ if st.button("Sentiment Analysis", type="secondary"):
                         for i, label in enumerate(predicted_labels):
                             results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
                         sentiment_df = pd.DataFrame(results)
                     value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
                     final_df = value_counts1
                     tab1, tab2 = st.tabs(["Pie Chart", "Word Cloud"])
@@ -162,13 +100,10 @@ if st.button("Sentiment Analysis", type="secondary"):
                         fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
                         fig1.update_traces(textposition='inside', textinfo='percent+label')
                         st.plotly_chart(fig1)
                     result = pd.concat([df, sentiment_df], axis=1)
                     with tab2:
                         text = " ".join(comment for comment in df['Comment'])
-                        stopwords_set = set(stopwords.words('english'))
                         text = re.sub('[^A-Za-z]+', ' ', text)
                         words = text.split()
                         clean_text = [word for word in words if word.lower() not in stopwords_set]
@@ -178,8 +113,6 @@ if st.button("Sentiment Analysis", type="secondary"):
                         plt.imshow(wc)
                         plt.axis('off')
                         st.pyplot(fig)
                     csv = result.to_csv(index=False)
                     st.download_button(
                         label="Download data as CSV",
@@ -187,14 +120,10 @@ if st.button("Sentiment Analysis", type="secondary"):
                         file_name='Summary of the results.csv',
                         mime='text/csv',
                     )
         else:
             st.warning("Please enter a URL.")
     else:
         st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
-if 'url_count' in st.session_state: #added if statement.
-    st.write(f"URL pasted {st.session_state['url_count']} times.")

+import streamlit as st
+import transformers
+import pandas as pd
+import torch
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+from webdriver_manager.chrome import ChromeType
+from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.options import Options
 import time
 import plotly.express as px
 from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import nltk
 from nltk.corpus import stopwords
+import re
 nltk.download('stopwords')
 st.subheader("YouTube Comments Sentiment Analysis", divider="red")
 tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
 model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
 if st.button("Sentiment Analysis", type="secondary"):
     if st.session_state['url_count'] < max_attempts:
         if url:
+            update_url_count()  # Increment count only when the button is pressed and URL is valid.
             with st.spinner("Wait for it...", show_time=True):
                 options = Options()
                 options.add_argument("--headless")
                 data = []
                 wait = WebDriverWait(driver, 30)
                 driver.get(url)
                 placeholder = st.empty()
                 progress_bar = st.progress(0)
                 for item in range(30):
                     try:
                         body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
                         body.send_keys(Keys.END)
                         placeholder.text(f"Scrolled {item + 1} times")
+                        progress_bar.progress((item + 1) / 30)  # Corrected progress bar update
                         time.sleep(0.5)
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
                 placeholder.text("Scrolling complete.")
                 progress_bar.empty()
                 data = []
                 try:
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                     user_id = 1
                     for comment in comments:
                         data.append({"Comment": comment.text})
                         user_id += 1
                     data = [dict(t) for t in {tuple(d.items()) for d in data}]
                 driver.quit()
                 df = pd.DataFrame(data, columns=["Comment"])
                 st.dataframe(df)
                 if tokenizer and model:
                     inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
                     with torch.no_grad():
                         for i, label in enumerate(predicted_labels):
                             results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
                         sentiment_df = pd.DataFrame(results)
                     value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
                     final_df = value_counts1
                     tab1, tab2 = st.tabs(["Pie Chart", "Word Cloud"])
                         fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
                         fig1.update_traces(textposition='inside', textinfo='percent+label')
                         st.plotly_chart(fig1)
                     result = pd.concat([df, sentiment_df], axis=1)
                     with tab2:
                         text = " ".join(comment for comment in df['Comment'])
+                        stopwords_set = set(stopwords.words('english'))
                         text = re.sub('[^A-Za-z]+', ' ', text)
                         words = text.split()
                         clean_text = [word for word in words if word.lower() not in stopwords_set]
                         plt.imshow(wc)
                         plt.axis('off')
                         st.pyplot(fig)
                     csv = result.to_csv(index=False)
                     st.download_button(
                         label="Download data as CSV",
                         file_name='Summary of the results.csv',
                         mime='text/csv',
                     )
         else:
             st.warning("Please enter a URL.")
     else:
         st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
+if 'url_count' in st.session_state:
+    st.write(f"URL pasted {st.session_state['url_count']} times.")