RatanPrakash commited on
Commit
137c7fb
·
1 Parent(s): 148f79b

Refactor code to replace 'preprocessor' with 'helper' for improved organization and clarity

Browse files
Files changed (3) hide show
  1. app.py +20 -13
  2. preprocessor.py → helper.py +2 -0
  3. main.ipynb +1 -1
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- import preprocessor
3
  import matplotlib.pyplot as plt
4
 
5
 
@@ -10,7 +10,7 @@ uploaded_file = st.sidebar.file_uploader('Choose a file')
10
 
11
  if uploaded_file is not None:
12
  chat_data = uploaded_file.getvalue().decode('utf-8')
13
- df = preprocessor.preprocess_data(chat_data)
14
  # st.write(df)
15
 
16
  #unique users dropdown menu to select user
@@ -22,7 +22,7 @@ if uploaded_file is not None:
22
  st.sidebar.write(selected_user)
23
 
24
  #fetch stats
25
- user_df, total_messages, media_messages, links, emojis, total_words = preprocessor.fetch_stats(selected_user, df)
26
  st.title(f'User: {selected_user}')
27
  # st.write(user_df)
28
  #display below data side by side
@@ -39,43 +39,50 @@ if uploaded_file is not None:
39
  #busiest users (users with most messages)
40
  with st.expander('Busiest users'):
41
  if selected_user == 'All':
42
- busiest_users, plot = preprocessor.busiest_users(df)
43
  st.write('Busiest users:')
44
  st.write(busiest_users)
45
  st.pyplot(plot)
46
 
47
  with st.expander('View word cloud and most common words'):
48
- st.pyplot(preprocessor.word_cloud(df, selected_user))
49
  st.write('Most common words')
50
- temp_df, plot = preprocessor.most_common_words(selected_user, df)
51
  st.write(temp_df)
52
  st.pyplot(plot)
53
  st.write("Most common emojis")
54
- temp_df = preprocessor.most_common_emojis(selected_user, df)
55
  st.write(temp_df)
56
 
57
  with st.expander("Activity over time"):
58
  col1, col2 = st.columns(2)
59
  with col1: #left
60
- _, plot = preprocessor.daily_timeline(selected_user, df)
61
  st.pyplot(plot)
62
- _, plot = preprocessor.weekday_activity_map(selected_user, df)
63
  st.pyplot(plot)
64
  with col2: #right
65
- _, plot = preprocessor.monthly_timeline(selected_user, df)
66
  st.pyplot(plot)
67
- _, plot = preprocessor.month_activity_map(selected_user, df)
68
  st.pyplot(plot)
69
 
70
  st.write("Messages sent by hour")
71
- _, plot = preprocessor.hour_activity_map(selected_user, df)
72
  st.pyplot(plot)
73
 
74
  st.write("weekly heatmap")
75
- plot = preprocessor.activity_heatmap(selected_user, df)
76
  st.pyplot(plot)
77
 
 
 
 
78
 
 
 
 
 
79
 
80
 
81
 
 
1
  import streamlit as st
2
+ import helper
3
  import matplotlib.pyplot as plt
4
 
5
 
 
10
 
11
  if uploaded_file is not None:
12
  chat_data = uploaded_file.getvalue().decode('utf-8')
13
+ df = helper.preprocess_data(chat_data)
14
  # st.write(df)
15
 
16
  #unique users dropdown menu to select user
 
22
  st.sidebar.write(selected_user)
23
 
24
  #fetch stats
25
+ user_df, total_messages, media_messages, links, emojis, total_words = helper.fetch_stats(selected_user, df)
26
  st.title(f'User: {selected_user}')
27
  # st.write(user_df)
28
  #display below data side by side
 
39
  #busiest users (users with most messages)
40
  with st.expander('Busiest users'):
41
  if selected_user == 'All':
42
+ busiest_users, plot = helper.busiest_users(df)
43
  st.write('Busiest users:')
44
  st.write(busiest_users)
45
  st.pyplot(plot)
46
 
47
  with st.expander('View word cloud and most common words'):
48
+ st.pyplot(helper.word_cloud(df, selected_user))
49
  st.write('Most common words')
50
+ temp_df, plot = helper.most_common_words(selected_user, df)
51
  st.write(temp_df)
52
  st.pyplot(plot)
53
  st.write("Most common emojis")
54
+ temp_df = helper.most_common_emojis(selected_user, df)
55
  st.write(temp_df)
56
 
57
  with st.expander("Activity over time"):
58
  col1, col2 = st.columns(2)
59
  with col1: #left
60
+ _, plot = helper.daily_timeline(selected_user, df)
61
  st.pyplot(plot)
62
+ _, plot = helper.weekday_activity_map(selected_user, df)
63
  st.pyplot(plot)
64
  with col2: #right
65
+ _, plot = helper.monthly_timeline(selected_user, df)
66
  st.pyplot(plot)
67
+ _, plot = helper.month_activity_map(selected_user, df)
68
  st.pyplot(plot)
69
 
70
  st.write("Messages sent by hour")
71
+ _, plot = helper.hour_activity_map(selected_user, df)
72
  st.pyplot(plot)
73
 
74
  st.write("weekly heatmap")
75
+ plot = helper.activity_heatmap(selected_user, df)
76
  st.pyplot(plot)
77
 
78
+ with st.expander("Links shared"):
79
+ temp_df = helper.extract_links(df)
80
+ st.write(temp_df)
81
 
82
+ common_domains, plot = helper.plot_common_domains(df)
83
+ st.pyplot(plot)
84
+ st.write("Most common domains")
85
+ st.write(common_domains)
86
 
87
 
88
 
preprocessor.py → helper.py RENAMED
@@ -228,3 +228,5 @@ def activity_heatmap(selected_user, df):
228
  plt.xlabel('Hour of Day')
229
  plt.ylabel('Day of Week')
230
  return plt
 
 
 
228
  plt.xlabel('Hour of Day')
229
  plt.ylabel('Day of Week')
230
  return plt
231
+
232
+
main.ipynb CHANGED
@@ -11227,7 +11227,7 @@
11227
  "outputs": [],
11228
  "source": [
11229
  "import emoji\n",
11230
- "from preprocessor import *\n",
11231
  "from collections import Counter"
11232
  ]
11233
  },
 
11227
  "outputs": [],
11228
  "source": [
11229
  "import emoji\n",
11230
+ "from helper import *\n",
11231
  "from collections import Counter"
11232
  ]
11233
  },