Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

nlpblogs commited on Apr 9

Commit

16adfc0

verified ·

1 Parent(s): 76a4123

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -68

app.py CHANGED Viewed

@@ -70,17 +70,8 @@ with st.sidebar:
 ''')
-st.subheader("YouTube Comments Sentiment Analysis", divider="red")
-tokenizer = transformers.DistilBertTokenizer.from_pretrained("tabularisai/robust-sentiment-analysis")
-model = transformers.DistilBertForSequenceClassification.from_pretrained("tabularisai/robust-sentiment-analysis")
-if 'url_count' not in st.session_state:
-    st.session_state['url_count'] = 0
-max_attempts = 2
-def update_url_count():
-    st.session_state['url_count'] += 1
 def clear_question():
     st.session_state["url"] = ""
@@ -121,67 +112,32 @@ if st.button("Sentiment Analysis", type="secondary"):
                 placeholder.text("Scrolling complete.")
                 progress_bar.empty()
-                data = []
-                try:
-                    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
-                    comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
-                    user_id = 1
-                    for comment in comments:
-                        views = comment.find_element_by_xpath(by=By.XPATH, './/*[@id="metadata-line"]/span[1]').text
-                        timestamp = comment.find_element_by_xpath(by=By.XPATH, './/*[@id="metadata-line"]/span[2]').text
-                        data.append({"User ID": user_id, "Comment": comment.text, "comment_date": timestamp})
-                        user_id += 1
-                    data = [dict(t) for t in {tuple(d.items()) for d in data}]
-                except Exception as e:
-                    st.error(f"Exception during comment extraction: {e}")
-                driver.quit()
-                df = pd.DataFrame(data, columns=["User ID", "Comment", "comment_date"])
-                st.dataframe(df)
-                if tokenizer and model:
-                    inputs = tokenizer(df['Comment'].tolist(), return_tensors="pt", padding=True, truncation=True)
-                    with torch.no_grad():
-                        logits = model(**inputs).logits
-                        predicted_probabilities = torch.nn.functional.softmax(logits, dim=-1)
-                        predicted_labels = predicted_probabilities.argmax(dim=1)
-                        results = []
-                        for i, label in enumerate(predicted_labels):
-                            results.append({'Review Number': i + 1, 'Sentiment': model.config.id2label[label.item()]})
-                        sentiment_df = pd.DataFrame(results)
-                    value_counts1 = sentiment_df['Sentiment'].value_counts().rename_axis('Sentiment').reset_index(name='count')
-                    final_df = value_counts1
-                    tab1, tab2 = st.tabs(["Pie Chart", "Bar Chart"])
-                    with tab1:
-                        fig1 = px.pie(final_df, values='count', names='Sentiment', hover_data=['count'], labels={'count': 'count'})
-                        fig1.update_traces(textposition='inside', textinfo='percent+label')
-                        st.plotly_chart(fig1)
-                    result = pd.concat([df, sentiment_df], axis=1)
-                    st.dataframe(result)
-                    with tab2:
-                        fig2 = px.bar(result, x="Sentiment", y="comment_date", color="Sentiment")
-                        st.plotly_chart(fig2)
-                    csv = result.to_csv(index=False)
-                    st.download_button(
-                        label="Download data as CSV",
-                        data=csv,
-                        file_name='Summary of the results.csv',
-                        mime='text/csv',
-                    )
-        else:
-            st.warning("Please enter a URL.")
-    else:
-        st.warning(f"You have reached the maximum URL attempts ({max_attempts}).")
-if 'url_count' in st.session_state: #added if statement.
-    st.write(f"URL pasted {st.session_state['url_count']} times.")

 ''')
 def clear_question():
     st.session_state["url"] = ""
                 placeholder.text("Scrolling complete.")
                 progress_bar.empty()
+                videos = driver.find_elements_by_class_name('style-scope ytd-grid-video-renderer')
+                youtube_videos = []
+                for video in videos:
+                    link = url
+                    title = video.find_element_by_xpath('.//*[@id="video-title"]').text
+                    views = video.find_element_by_xpath('.//*[@id="metadata-line"]/span[1]').text
+                    date = video.find_element_by_xpath('.//*[@id="metadata-line"]/span[2]').text
+                    vid_items = {
+                        'Title': title,
+                        'Views': views,
+                        'Posted': date,
+                        'Likes' : likes,
+                        'link': link
+                    }
+                    youtube_videos.append(vid_items)
+                    df = pd.DataFrame(youtube_videos)
+                    st.dataframe(df)