Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

nlpblogs commited on 25 days ago

Commit

feca4a5

verified ·

1 Parent(s): 54fb744

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -11

app.py CHANGED Viewed

@@ -32,6 +32,13 @@ import plotly.express as px
 import zipfile
 import torch
 with st.sidebar:
@@ -120,15 +127,76 @@ if st.button("Sentiment Analysis", type="secondary"):
                 placeholder.text("Scrolling complete.")
                 progress_bar.empty()
-                dates = []
-                wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
-                comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
-                st.write(comments)
-                for comment in comments:
-                    comment_text = comment.text #Extract the text from the WebElement
-                    date_match = re.search(r'(\d+ (day|week|month|year)s? ago)|(\d{4}-\d{2}-\d{2})', comment_text)
-                    if date_match:
-                        date_string = date_match.group(0) #Get the matched date string.
-                        st.write(date_string) #for debug

 import zipfile
 import torch
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+from nltk.corpus import stopwords
+import nltk
+nltk.download('stopwords')
 with st.sidebar:
                 placeholder.text("Scrolling complete.")
                 progress_bar.empty()
+                data = []
+                try:
+                    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
+                    comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
+                    user_id = 1
+                    for comment in comments:
+                        timestamp = datetime.now().strftime("%Y-%m-%d")
+                        data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
+                        user_id += 1
+                    data = [dict(t) for t in {tuple(d.items()) for d in data}]
+                except Exception as e:
+                    st.error(f"Exception during comment extraction: {e}")
+                driver.quit()
+                df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
+                st.dataframe(df)
+                if tokenizer and model:
+                    inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
+                    with torch.no_grad():
+                        logits = model(**inputs).logits
+                        predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
+                        predicted_labels = predicted_probabilities.argmax(dim=1)
+                        results = []
+                        for i, label in enumerate(predicted_labels):
+                            results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
+                        sentiment_df = pd.DataFrame(results)
+                    value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
+                    final_df = value_counts1
+                    tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
+                    with tab1:
+                        fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
+                        fig1.update_traces(textposition='inside', textinfo='percent+label')
+                        st.plotly_chart(fig1)
+                    result = pd.concat([df, sentiment_df], axis=1)
+                    st.dataframe(result)
+                    with tab2:
+                        fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
+                        st.plotly_chart(fig2)
+                    text = " ".join(review for review in df['Review'])
+                    stopwords = set(stopwords.words('english'))
+                    text = re.sub('[^A-Za-z]+', ' ', text)
+                    words = text.split()
+                    clean_text = [word for word in words if word.lower() not in stopwords]
+                    clean_text = ' '.join(clean_text)
+                    stopwords = set(stopwords.words('english'))
+                    wc = WordCloud(width=800, height=400, background_color='white').generate(clean_text)
+                    fig = plt.figure(figsize=(12,6))
+                    plt.imshow(wc, interpolation='bilinear')
+                    plt.axis('off')
+                    st.pyplot(fig)
+                    csv = result.to_csv(index=False)
+                    st.download_button(
+                        label="Download data as CSV",
+                        data=csv,
+                        file_name='Summary of the results.csv',
+                        mime='text/csv',
+                    )
+        else:
+            st.warning("Please enter a URL.")
+    else:
+        st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
+if 'url_count' in st.session_state: #added if statement.
+    st.write(f"URL pasted {st.session_state['url_count']} times.")