Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

File size: 7,898 Bytes

31af44b
 
 
 
ca12572
 
31af44b
 
 
ca12572
 
31af44b
 
ca12572
5a1ddac
add0c31
31af44b
 
add0c31
31af44b
add0c31
 
 
d50002e
 
 
 
 
 
 
 
 
cc688c2
d50002e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea96c2b
 
 
ca12572
ea96c2b
 
eae5fab
2e14077
ea96c2b
 
 
eae5fab
ca12572
eae5fab
ca12572
eae5fab
ca12572
 
eae5fab
 
 
31af44b
eae5fab
 
 
 
 
 
 
 
 
 
 
 
30bf2ff
 
5b710b5
30bf2ff
2096041
 
4307f05
75798a6
7a8dd31
30bf2ff
 
 
 
 
feca4a5
 
 
 
 
 
712bcb0
feca4a5
 
 
 
 
712bcb0
49549b5
d50002e
712bcb0
 
 
 
 
 
 
 
 
9ea338b
 
d50002e
 
 
712bcb0
 
 
 
 
 
 
49549b5
712bcb0
 
31af44b
712bcb0
 
 
 
 
 
 
 
 
9ea338b
c6b1123
 
9ea338b
712bcb0
 
 
 
 
 
 
 
 
 
 
c6b1123
 
31af44b

import streamlit as st
import transformers
import pandas as pd
import torch
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.chrome import ChromeType
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import time
import plotly.express as px
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
import re

nltk.download('stopwords')

with st.sidebar:
    st.button("DEMO APP", type="primary")
   

    expander = st.expander("**Important notes on the YouTube Comments Sentiment Analysis App**")
    expander.write('''
    
    
    **How to Use**
    This app works with a YouTube URL.  Paste the URL and press the 'Sentiment Analysis' button to perform sentiment analysis on the YouTube Comments.
    
    
    **Usage Limits**
    You can perform sentiment analysis on YouTube Comments up to 5 times.
    
    
    **Subscription Management**
    This demo app offers a one-day subscription, expiring after 24 hours. If you are interested in building your own YouTube Comments Sentiment Analysis Web App, we invite you to explore our NLP Web App Store on our website. You can select your desired features, place your order, and we will deliver your custom app in five business days. If you wish to delete your Account with us, please contact us at [email protected]
    
    
    **Customization**
    To change the app's background color to white or black, click the three-dot menu on the right-hand side of your app, go to Settings and then Choose app theme, colors and fonts.
    
    
    **Charts**
    Hover to interact with and download the charts.
    
    
    **File Handling and Errors**
    For any errors or inquiries, please contact us at [email protected]
   
    
    
''')



st.subheader("YouTube Comments Sentiment Analysis", divider="red")
tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")

if 'url_count' not in st.session_state:
    st.session_state['url_count'] = 0

max_attempts = 5

def update_url_count():
    st.session_state['url_count'] += 1

def clear_question():
    st.session_state["url"] = ""

url = st.text_input("Enter YouTube URL:", key="url")
st.button("Clear question", on_click=clear_question)

if st.button("Sentiment Analysis", type="secondary"):
    if st.session_state['url_count'] < max_attempts:
        if url:
            update_url_count()  # Increment count only when the button is pressed and URL is valid.
            with st.spinner("Wait for it...", show_time=True):
                options = Options()
                options.add_argument("--headless")
                options.add_argument("--disable-gpu")
                options.add_argument("--no-sandbox")
                options.add_argument("--disable-dev-shm-usage")
                options.add_argument("--start-maximized")
                service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())
                driver = webdriver.Chrome(service=service, options=options)
                data = []
                wait = WebDriverWait(driver, 30)
                driver.get(url)
                placeholder = st.empty()
                progress_bar = st.progress(0)
                for item in range(30):
                    try:
                        body = WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, "body")))
                        body.send_keys(Keys.END)
                        placeholder.text(f"Scrolled {item + 1} times")
                        progress_bar.progress((item + 1) / 150)  
                        time.sleep(0.5)
                    except Exception as e:
                        st.error(f"Exception during scrolling: {e}")
                        break
                placeholder.text("Scrolling complete.")
                progress_bar.empty()
                data = []
                try:
                    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                    comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                    user_id = 1
                    for comment in comments:
                        data.append({"Comment": comment.text})
                        user_id += 1
                    data = [dict(t) for t in {tuple(d.items()) for d in data}]
                except Exception as e:
                    st.error(f"Exception during comment extraction: {e}")
                driver.quit()
                df = pd.DataFrame(data, columns=["Comment"])
                st.dataframe(df)
                
                if tokenizer and model:
                    inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
                    with torch.no_grad():
                        logits = model(**inputs).logits
                        predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
                        predicted_labels = predicted_probabilities.argmax(dim=1)
                        results = []
                        for i, label in enumerate(predicted_labels):
                            results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
                        sentiment_df = pd.DataFrame(results)
                    


                        
                    value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
                    final_df = value_counts1
                    tab1, tab2 = st.tabs(["Pie Chart", "Word Cloud"])
                    with tab1:
                        fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
                        fig1.update_traces(textposition='inside', textinfo='percent+label')
                        st.plotly_chart(fig1)
                    result = pd.concat([df, sentiment_df], axis=1)
                    with tab2:
                        text = " ".join(comment for comment in df['Comment'])
                        stopwords_set = set(stopwords.words('english'))
                        text = re.sub('[^A-Za-z]+', ' ', text)
                        words = text.split()
                        clean_text = [word for word in words if word.lower() not in stopwords_set]
                        clean_text = ' '.join(clean_text)
                        wc = WordCloud(width=3000, height=2000, background_color='black', colormap='Pastel1', collocations=False).generate(clean_text)
                        fig = plt.figure(figsize=(40, 30))
                        plt.imshow(wc)
                        plt.axis('off')
                        st.pyplot(fig)
                    
                    
                    result1 = result.drop('Review Number', axis=1)
                    csv = result1.to_csv(index=False)
                    st.download_button(
                        label="Download data as CSV",
                        data=csv,
                        file_name='Summary of the results.csv',
                        mime='text/csv',
                    )
        else:
            st.warning("Please enter a URL.")
    else:
        st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")

st.divider()

if 'url_count' in st.session_state:
    st.write(f"URL pasted {st.session_state['url_count']} times.")