Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

nlpblogs commited on 25 days ago

Commit

712bcb0

verified ·

1 Parent(s): 8ae1eff

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -6

app.py CHANGED Viewed

@@ -134,17 +134,68 @@ if st.button("Sentiment Analysis", type="secondary"):
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                     user_id = 1
                     for comment in comments:
-                        timestamp = datetime.now().strftime("%Y-%m-%d")
-                        data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
                         user_id += 1
                     data = [dict(t) for t in {tuple(d.items()) for d in data}]
                 except Exception as e:
                     st.error(f"Exception during comment extraction: {e}")
                 driver.quit()
-                df1 = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
-                st.dataframe(df1)
-                df = df1.dropna(subset=['Comment'])
-                st.dataframe(df)

                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                     user_id = 1
                     for comment in comments:
+                        data.append({"Comment": comment.text})
                         user_id += 1
                     data = [dict(t) for t in {tuple(d.items()) for d in data}]
                 except Exception as e:
                     st.error(f"Exception during comment extraction: {e}")
                 driver.quit()
+                df = pd.DataFrame(data, columns=["Comment"])
+                if tokenizer and model:
+                    inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
+                    with torch.no_grad():
+                        logits = model(**inputs).logits
+                        predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
+                        predicted_labels = predicted_probabilities.argmax(dim=1)
+                        results = []
+                        for i, label in enumerate(predicted_labels):
+                            results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
+                        sentiment_df = pd.DataFrame(results)
+                    value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
+                    final_df = value_counts1
+                    tab1, tab2 = st.tabs(["Pie Chart", "Word Cloud"])
+                    with tab1:
+                        fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
+                        fig1.update_traces(textposition='inside', textinfo='percent+label')
+                        st.plotly_chart(fig1)
+                    with tab2:
+                        text = " ".join(comment for comment in df['Comment'])
+                        stopwords_set = set(stopwords.words('english')) # Correct import and usage
+                        text = re.sub('[^A-Za-z]+', ' ', text)
+                        words = text.split()
+                        clean_text = [word for word in words if word.lower() not in stopwords_set]
+                        clean_text = ' '.join(clean_text)
+                        wc = WordCloud(width=3000, height=2000, background_color='black', colormap='Pastel1', collocations=False).generate(clean_text)
+                        fig = plt.figure(figsize=(40, 30))
+                        plt.imshow(wc)
+                        plt.axis('off')
+                        st.pyplot(fig)
+                    csv = result.to_csv(index=False)
+                    st.download_button(
+                        label="Download data as CSV",
+                        data=csv,
+                        file_name='Summary of the results.csv',
+                        mime='text/csv',
+                    )
+        else:
+            st.warning("Please enter a URL.")
+    else:
+        st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
+if 'url_count' in st.session_state:
+    st.write(f"URL pasted {st.session_state['url_count']} times.")