jwu249 commited on
Commit
555bebb
·
verified ·
1 Parent(s): fc67db7

Upload 2 files

Browse files
Files changed (2) hide show
  1. amazon.jpeg +0 -0
  2. app.py +44 -5
amazon.jpeg ADDED
app.py CHANGED
@@ -11,7 +11,7 @@ from scipy.special import softmax
11
  # pip install torch torchvision torchaudio
12
  # pip install transformers
13
 
14
- st.title("Final Project Part 2 - jwu249 | Expert Visualizations")
15
 
16
  url = "https://www.kaggle.com/datasets/rahulgoel1106/xenophobia-on-twitter-during-covid19"
17
  st.write("Dataset Link to Download -> [Kaggle Covid-19 Xenophobic Datatset](%s)" % url)
@@ -209,20 +209,50 @@ filtered_summary = (
209
  )
210
 
211
 
212
- # Display the scatter plot
 
 
 
213
  scatter_plot = alt.Chart(filtered_df).mark_circle(size=60).encode(
214
  x=alt.X('index:Q', title='Index'),
215
  y=alt.Y('highest_score:Q', title='Highest Sentiment Score'),
216
  color=alt.Color('sentiment_type:N', title='Sentiment Type', scale=alt.Scale(scheme='tableau20')),
217
  tooltip=['index', 'sentiment_type', 'highest_score', 'cleaned_text', 'text']
 
 
218
  ).properties(
219
  width=800,
220
  height=400,
221
- title="Scatter Plot of Sentiment Scores (Filtered)"
222
  ).interactive()
223
 
224
- # Display the scatter plot
225
- st.altair_chart(scatter_plot, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
  # Display the filtered DataFrame and counts
228
  st.write(filtered_summary)
@@ -230,6 +260,15 @@ st.dataframe(
230
  filtered_df[['sentiment_type', 'cleaned_text', 'highest_score', 'text']]
231
  )
232
 
 
 
 
 
 
 
 
 
 
233
  st.header('''Write Up''')
234
  multi2 = '''Mentioned in the beginning, as a Asian American I wanted to highlight the xenophobic tweets during
235
  Covid-19 and using a trained sentiment analysis model to analyze and visualize the tweets was a instant idea
 
11
  # pip install torch torchvision torchaudio
12
  # pip install transformers
13
 
14
+ st.title("Final Project Part 2 - Jason Wu | Expert Visualizations")
15
 
16
  url = "https://www.kaggle.com/datasets/rahulgoel1106/xenophobia-on-twitter-during-covid19"
17
  st.write("Dataset Link to Download -> [Kaggle Covid-19 Xenophobic Datatset](%s)" % url)
 
209
  )
210
 
211
 
212
+ # Create a brush to link scatter plot and bar chart
213
+ brush = alt.selection_interval(encodings=['x', 'y'])
214
+
215
+ # Scatter plot with brush
216
  scatter_plot = alt.Chart(filtered_df).mark_circle(size=60).encode(
217
  x=alt.X('index:Q', title='Index'),
218
  y=alt.Y('highest_score:Q', title='Highest Sentiment Score'),
219
  color=alt.Color('sentiment_type:N', title='Sentiment Type', scale=alt.Scale(scheme='tableau20')),
220
  tooltip=['index', 'sentiment_type', 'highest_score', 'cleaned_text', 'text']
221
+ ).add_params(
222
+ brush
223
  ).properties(
224
  width=800,
225
  height=400,
226
+ title="Scatter Plot of Sentiment Scores (Filtered) - Brush Feature to show Bar Chart"
227
  ).interactive()
228
 
229
+ # Bar chart linked to scatter plot
230
+ bar_chart = alt.Chart(filtered_df).transform_filter(
231
+ brush
232
+ ).transform_filter(
233
+ alt.FieldOneOfPredicate(field='sentiment_type', oneOf=sentiment_filter) # Apply multiselect filter
234
+ ).transform_aggregate(
235
+ total_score='sum(highest_score)', # Aggregate the highest_score
236
+ groupby=['sentiment_type'] # Group by sentiment type
237
+ ).mark_bar().encode(
238
+ x=alt.X('sentiment_type:N', title='Sentiment Type'),
239
+ y=alt.Y('total_score:Q', title='Sum of Highest Scores'),
240
+ color=alt.Color('sentiment_type:N', scale=alt.Scale(scheme='tableau20'))
241
+
242
+ ).properties(
243
+ width=800,
244
+ height=200,
245
+ title="Bar Chart of Sentiment Sums (Linked to Scatter Plot)"
246
+ )
247
+
248
+ # Combine scatter and bar charts
249
+ combined_chart = alt.vconcat(
250
+ scatter_plot,
251
+ bar_chart
252
+ )
253
+
254
+ # Display the combined chart
255
+ st.altair_chart(combined_chart, use_container_width=True)
256
 
257
  # Display the filtered DataFrame and counts
258
  st.write(filtered_summary)
 
260
  filtered_df[['sentiment_type', 'cleaned_text', 'highest_score', 'text']]
261
  )
262
 
263
+ st.header('''Contextual Dataset''')
264
+ st.image("amazon.jpeg")
265
+ url1 = "https://www.kaggle.com/datasets/kritanjalijain/amazon-reviews"
266
+ st.write("Dataset Link to Download -> [Kaggle Amazon Reviews Dataset](%s)" % url1)
267
+
268
+ multi4 = '''The dataset I chose as a contextual dataset is an the Amazon Reviews dataset on Kaggle. I chose this dataset because of it similar nature in reviews of products instead. The dataset provides more sentiment for products but could
269
+ be used to train the model to be more efficient in determining Xenophobic tweets. In the future, I want to be able to train the model inorder to do more better analysis on the Twitter dataset.'''
270
+
271
+ st.markdown(multi4)
272
  st.header('''Write Up''')
273
  multi2 = '''Mentioned in the beginning, as a Asian American I wanted to highlight the xenophobic tweets during
274
  Covid-19 and using a trained sentiment analysis model to analyze and visualize the tweets was a instant idea