is445_final

Sleeping

App Files Files Community

jwu249 commited on Dec 15, 2024

Commit

555bebb

verified ·

1 Parent(s): fc67db7

Upload 2 files

Browse files

Files changed (2) hide show

amazon.jpeg +0 -0
app.py +44 -5

amazon.jpeg ADDED Viewed

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from scipy.special import softmax
 # pip install torch torchvision torchaudio
 # pip install transformers
-st.title("Final Project Part 2 - jwu249 | Expert Visualizations")
 url = "https://www.kaggle.com/datasets/rahulgoel1106/xenophobia-on-twitter-during-covid19"
 st.write("Dataset Link to Download -> [Kaggle Covid-19 Xenophobic Datatset](%s)" % url)
@@ -209,20 +209,50 @@ filtered_summary = (
 )
-# Display the scatter plot
 scatter_plot = alt.Chart(filtered_df).mark_circle(size=60).encode(
     x=alt.X('index:Q', title='Index'),
     y=alt.Y('highest_score:Q', title='Highest Sentiment Score'),
     color=alt.Color('sentiment_type:N', title='Sentiment Type', scale=alt.Scale(scheme='tableau20')),
     tooltip=['index', 'sentiment_type', 'highest_score', 'cleaned_text', 'text']
 ).properties(
     width=800,
     height=400,
-    title="Scatter Plot of Sentiment Scores (Filtered)"
 ).interactive()
-# Display the scatter plot
-st.altair_chart(scatter_plot, use_container_width=True)
 # Display the filtered DataFrame and counts
 st.write(filtered_summary)
@@ -230,6 +260,15 @@ st.dataframe(
     filtered_df[['sentiment_type', 'cleaned_text', 'highest_score', 'text']]
 )
 st.header('''Write Up''')
 multi2 = '''Mentioned in the beginning, as a Asian American I wanted to highlight the xenophobic tweets during
 Covid-19 and using a trained sentiment analysis model to analyze and visualize the tweets was a instant idea

 # pip install torch torchvision torchaudio
 # pip install transformers
+st.title("Final Project Part 2 - Jason Wu | Expert Visualizations")
 url = "https://www.kaggle.com/datasets/rahulgoel1106/xenophobia-on-twitter-during-covid19"
 st.write("Dataset Link to Download -> [Kaggle Covid-19 Xenophobic Datatset](%s)" % url)
 )
+# Create a brush to link scatter plot and bar chart
+brush = alt.selection_interval(encodings=['x', 'y'])
+# Scatter plot with brush
 scatter_plot = alt.Chart(filtered_df).mark_circle(size=60).encode(
     x=alt.X('index:Q', title='Index'),
     y=alt.Y('highest_score:Q', title='Highest Sentiment Score'),
     color=alt.Color('sentiment_type:N', title='Sentiment Type', scale=alt.Scale(scheme='tableau20')),
     tooltip=['index', 'sentiment_type', 'highest_score', 'cleaned_text', 'text']
+).add_params(
+    brush
 ).properties(
     width=800,
     height=400,
+    title="Scatter Plot of Sentiment Scores (Filtered) - Brush Feature to show Bar Chart"
 ).interactive()
+# Bar chart linked to scatter plot
+bar_chart = alt.Chart(filtered_df).transform_filter(
+    brush
+).transform_filter(
+    alt.FieldOneOfPredicate(field='sentiment_type', oneOf=sentiment_filter)  # Apply multiselect filter
+).transform_aggregate(
+    total_score='sum(highest_score)',  # Aggregate the highest_score
+    groupby=['sentiment_type']         # Group by sentiment type
+).mark_bar().encode(
+    x=alt.X('sentiment_type:N', title='Sentiment Type'),
+    y=alt.Y('total_score:Q', title='Sum of Highest Scores'),
+    color=alt.Color('sentiment_type:N', scale=alt.Scale(scheme='tableau20'))
+).properties(
+    width=800,
+    height=200,
+    title="Bar Chart of Sentiment Sums (Linked to Scatter Plot)"
+)
+# Combine scatter and bar charts
+combined_chart = alt.vconcat(
+    scatter_plot,
+    bar_chart
+)
+# Display the combined chart
+st.altair_chart(combined_chart, use_container_width=True)
 # Display the filtered DataFrame and counts
 st.write(filtered_summary)
     filtered_df[['sentiment_type', 'cleaned_text', 'highest_score', 'text']]
 )
+st.header('''Contextual Dataset''')
+st.image("amazon.jpeg")
+url1 = "https://www.kaggle.com/datasets/kritanjalijain/amazon-reviews"
+st.write("Dataset Link to Download -> [Kaggle Amazon Reviews Dataset](%s)" % url1)
+multi4 = '''The dataset I chose as a contextual dataset is an the Amazon Reviews dataset on Kaggle. I chose this dataset because of it similar nature in reviews of products instead. The dataset provides more sentiment for products but could
+be used to train the model to be more efficient in determining Xenophobic tweets. In the future, I want to be able to train the model inorder to do more better analysis on the Twitter dataset.'''
+st.markdown(multi4)
 st.header('''Write Up''')
 multi2 = '''Mentioned in the beginning, as a Asian American I wanted to highlight the xenophobic tweets during
 Covid-19 and using a trained sentiment analysis model to analyze and visualize the tweets was a instant idea