Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- amazon.jpeg +0 -0
- app.py +44 -5
amazon.jpeg
ADDED
![]() |
app.py
CHANGED
@@ -11,7 +11,7 @@ from scipy.special import softmax
|
|
11 |
# pip install torch torchvision torchaudio
|
12 |
# pip install transformers
|
13 |
|
14 |
-
st.title("Final Project Part 2 -
|
15 |
|
16 |
url = "https://www.kaggle.com/datasets/rahulgoel1106/xenophobia-on-twitter-during-covid19"
|
17 |
st.write("Dataset Link to Download -> [Kaggle Covid-19 Xenophobic Datatset](%s)" % url)
|
@@ -209,20 +209,50 @@ filtered_summary = (
|
|
209 |
)
|
210 |
|
211 |
|
212 |
-
#
|
|
|
|
|
|
|
213 |
scatter_plot = alt.Chart(filtered_df).mark_circle(size=60).encode(
|
214 |
x=alt.X('index:Q', title='Index'),
|
215 |
y=alt.Y('highest_score:Q', title='Highest Sentiment Score'),
|
216 |
color=alt.Color('sentiment_type:N', title='Sentiment Type', scale=alt.Scale(scheme='tableau20')),
|
217 |
tooltip=['index', 'sentiment_type', 'highest_score', 'cleaned_text', 'text']
|
|
|
|
|
218 |
).properties(
|
219 |
width=800,
|
220 |
height=400,
|
221 |
-
title="Scatter Plot of Sentiment Scores (Filtered)"
|
222 |
).interactive()
|
223 |
|
224 |
-
#
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
# Display the filtered DataFrame and counts
|
228 |
st.write(filtered_summary)
|
@@ -230,6 +260,15 @@ st.dataframe(
|
|
230 |
filtered_df[['sentiment_type', 'cleaned_text', 'highest_score', 'text']]
|
231 |
)
|
232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
st.header('''Write Up''')
|
234 |
multi2 = '''Mentioned in the beginning, as a Asian American I wanted to highlight the xenophobic tweets during
|
235 |
Covid-19 and using a trained sentiment analysis model to analyze and visualize the tweets was a instant idea
|
|
|
11 |
# pip install torch torchvision torchaudio
|
12 |
# pip install transformers
|
13 |
|
14 |
+
st.title("Final Project Part 2 - Jason Wu | Expert Visualizations")
|
15 |
|
16 |
url = "https://www.kaggle.com/datasets/rahulgoel1106/xenophobia-on-twitter-during-covid19"
|
17 |
st.write("Dataset Link to Download -> [Kaggle Covid-19 Xenophobic Datatset](%s)" % url)
|
|
|
209 |
)
|
210 |
|
211 |
|
212 |
+
# Create a brush to link scatter plot and bar chart
|
213 |
+
brush = alt.selection_interval(encodings=['x', 'y'])
|
214 |
+
|
215 |
+
# Scatter plot with brush
|
216 |
scatter_plot = alt.Chart(filtered_df).mark_circle(size=60).encode(
|
217 |
x=alt.X('index:Q', title='Index'),
|
218 |
y=alt.Y('highest_score:Q', title='Highest Sentiment Score'),
|
219 |
color=alt.Color('sentiment_type:N', title='Sentiment Type', scale=alt.Scale(scheme='tableau20')),
|
220 |
tooltip=['index', 'sentiment_type', 'highest_score', 'cleaned_text', 'text']
|
221 |
+
).add_params(
|
222 |
+
brush
|
223 |
).properties(
|
224 |
width=800,
|
225 |
height=400,
|
226 |
+
title="Scatter Plot of Sentiment Scores (Filtered) - Brush Feature to show Bar Chart"
|
227 |
).interactive()
|
228 |
|
229 |
+
# Bar chart linked to scatter plot
|
230 |
+
bar_chart = alt.Chart(filtered_df).transform_filter(
|
231 |
+
brush
|
232 |
+
).transform_filter(
|
233 |
+
alt.FieldOneOfPredicate(field='sentiment_type', oneOf=sentiment_filter) # Apply multiselect filter
|
234 |
+
).transform_aggregate(
|
235 |
+
total_score='sum(highest_score)', # Aggregate the highest_score
|
236 |
+
groupby=['sentiment_type'] # Group by sentiment type
|
237 |
+
).mark_bar().encode(
|
238 |
+
x=alt.X('sentiment_type:N', title='Sentiment Type'),
|
239 |
+
y=alt.Y('total_score:Q', title='Sum of Highest Scores'),
|
240 |
+
color=alt.Color('sentiment_type:N', scale=alt.Scale(scheme='tableau20'))
|
241 |
+
|
242 |
+
).properties(
|
243 |
+
width=800,
|
244 |
+
height=200,
|
245 |
+
title="Bar Chart of Sentiment Sums (Linked to Scatter Plot)"
|
246 |
+
)
|
247 |
+
|
248 |
+
# Combine scatter and bar charts
|
249 |
+
combined_chart = alt.vconcat(
|
250 |
+
scatter_plot,
|
251 |
+
bar_chart
|
252 |
+
)
|
253 |
+
|
254 |
+
# Display the combined chart
|
255 |
+
st.altair_chart(combined_chart, use_container_width=True)
|
256 |
|
257 |
# Display the filtered DataFrame and counts
|
258 |
st.write(filtered_summary)
|
|
|
260 |
filtered_df[['sentiment_type', 'cleaned_text', 'highest_score', 'text']]
|
261 |
)
|
262 |
|
263 |
+
st.header('''Contextual Dataset''')
|
264 |
+
st.image("amazon.jpeg")
|
265 |
+
url1 = "https://www.kaggle.com/datasets/kritanjalijain/amazon-reviews"
|
266 |
+
st.write("Dataset Link to Download -> [Kaggle Amazon Reviews Dataset](%s)" % url1)
|
267 |
+
|
268 |
+
multi4 = '''The dataset I chose as a contextual dataset is an the Amazon Reviews dataset on Kaggle. I chose this dataset because of it similar nature in reviews of products instead. The dataset provides more sentiment for products but could
|
269 |
+
be used to train the model to be more efficient in determining Xenophobic tweets. In the future, I want to be able to train the model inorder to do more better analysis on the Twitter dataset.'''
|
270 |
+
|
271 |
+
st.markdown(multi4)
|
272 |
st.header('''Write Up''')
|
273 |
multi2 = '''Mentioned in the beginning, as a Asian American I wanted to highlight the xenophobic tweets during
|
274 |
Covid-19 and using a trained sentiment analysis model to analyze and visualize the tweets was a instant idea
|