rafaldembski commited on
Commit
82d13f9
verified
1 Parent(s): 87c8540

Update pages/Statistics.py

Browse files
Files changed (1) hide show
  1. pages/Statistics.py +44 -79
pages/Statistics.py CHANGED
@@ -29,11 +29,12 @@ page_translations = {
29
  'frauds_over_time': "Liczba wykrytych oszustw w czasie",
30
  'risk_distribution': "Rozk艂ad ocen ryzyka oszustwa",
31
  'fraud_country_distribution': "Rozk艂ad oszustw wed艂ug kraj贸w",
 
 
 
32
  'heatmap_title': "Mapa ciep艂a oszustw w czasie",
33
  'fraud_vs_nonfraud': "Procentowy podzia艂: Oszustwa vs Bezpieczne",
34
  'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
35
- 'search_placeholder': "Wyszukaj numer telefonu lub tre艣膰 wiadomo艣ci",
36
- 'date_filter_label': "Filtruj po dacie",
37
  'download_button': "馃摜 Pobierz dane jako CSV"
38
  },
39
  'German': {
@@ -48,11 +49,12 @@ page_translations = {
48
  'frauds_over_time': "Anzahl der erkannten Betr眉gereien im Laufe der Zeit",
49
  'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
50
  'fraud_country_distribution': "Betrug nach L盲ndern",
 
 
 
51
  'heatmap_title': "Heatmap der Betr眉gereien im Laufe der Zeit",
52
  'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
53
  'no_data': "Keine Daten zur Anzeige verf眉gbar.",
54
- 'search_placeholder': "Telefonnummer oder Nachrichtentext suchen",
55
- 'date_filter_label': "Nach Datum filtern",
56
  'download_button': "馃摜 Daten als CSV herunterladen"
57
  },
58
  'English': {
@@ -67,11 +69,12 @@ page_translations = {
67
  'frauds_over_time': "Number of Detected Frauds Over Time",
68
  'risk_distribution': "Distribution of Fraud Risk Scores",
69
  'fraud_country_distribution': "Fraud Distribution by Countries",
 
 
 
70
  'heatmap_title': "Fraud Heatmap Over Time",
71
  'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
72
  'no_data': "No data available to display.",
73
- 'search_placeholder': "Search phone number or message content",
74
- 'date_filter_label': "Filter by Date",
75
  'download_button': "馃摜 Download data as CSV"
76
  }
77
  }
@@ -114,11 +117,6 @@ def main(language):
114
  if history:
115
  st.markdown(f"### {translations['history_title']}")
116
 
117
- # Pole wyszukiwania
118
- search_query = st.text_input(translations['search_placeholder'], '')
119
-
120
- # Dodanie filtr贸w daty
121
- st.markdown(f"### {translations['date_filter_label']}")
122
  df_history = pd.DataFrame(history)
123
 
124
  # Sprawdzenie, czy 'timestamp' istnieje
@@ -132,103 +130,70 @@ def main(language):
132
  # Dodanie kolumny 'date' dla wizualizacji
133
  df_history['date'] = df_history['timestamp'].dt.date
134
 
135
- # Wyb贸r zakresu dat
136
- if language == 'Polish':
137
- start_label = "Data pocz膮tkowa"
138
- end_label = "Data ko艅cowa"
139
- elif language == 'German':
140
- start_label = "Startdatum"
141
- end_label = "Enddatum"
142
- else:
143
- start_label = "Start Date"
144
- end_label = "End Date"
145
-
146
- start_date = st.date_input(start_label, df_history['date'].min())
147
- end_date = st.date_input(end_label, df_history['date'].max())
148
-
149
- # Filtrowanie danych po dacie
150
- df_filtered = df_history[
151
- (df_history['date'] >= start_date) &
152
- (df_history['date'] <= end_date)
153
- ]
154
-
155
- # Dodatkowe filtrowanie na podstawie zapytania wyszukiwania
156
- if search_query:
157
- df_filtered = df_filtered[
158
- df_filtered['phone_number'].str.contains(search_query, case=False, na=False) |
159
- df_filtered['message'].str.contains(search_query, case=False, na=False)
160
- ]
161
 
162
  # Wy艣wietlenie tabeli historii
163
- st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)
164
 
165
  # Opcjonalnie: Dodanie mo偶liwo艣ci eksportu danych
166
- if not df_filtered.empty:
167
- if language == 'Polish':
168
- download_label = "馃摜 Pobierz dane jako CSV"
169
- elif language == 'German':
170
- download_label = "馃摜 Daten als CSV herunterladen"
171
- else:
172
- download_label = "馃摜 Download data as CSV"
173
-
174
- csv = df_filtered.to_csv(index=False).encode('utf-8')
175
  st.download_button(
176
- label=download_label,
177
  data=csv,
178
  file_name='analysis_history.csv',
179
  mime='text/csv',
180
  )
181
 
182
- # Wykres ko艂owy dla ocen ryzyka
183
- st.markdown(f"### {translations['risk_distribution']}")
 
 
 
 
184
 
185
- # Wyodr臋bnienie ocen ryzyka
 
186
  def extract_risk_score(risk_assessment):
187
  match = re.search(r'(\d+)/10', risk_assessment)
188
  return int(match.group(1)) if match else 0
189
 
190
- df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
191
- risk_data = df_filtered['risk_score'].value_counts().sort_index()
192
- risk_labels = [f'Risk {i}/10' for i in risk_data.index]
193
- fig_risk_pie = go.Figure(data=[go.Pie(labels=risk_labels, values=risk_data, hole=.3, marker_colors=px.colors.sequential.RdBu)])
194
- fig_risk_pie.update_layout(title_text=translations['risk_distribution'])
195
- st.plotly_chart(fig_risk_pie, use_container_width=True)
196
-
197
- # Wizualizacja rozk艂adu oszustw wed艂ug kraj贸w
198
- st.markdown(f"### {translations['fraud_country_distribution']}")
199
 
200
- # Dodanie informacji o kraju do historii
 
201
  def add_country_info(row):
202
  country, _ = get_phone_info(row['phone_number'])
203
  return country
204
 
205
- df_filtered['country'] = df_filtered.apply(add_country_info, axis=1)
206
-
207
- if df_filtered['country'].notnull().any():
208
- # Przygotowanie danych geograficznych
209
- country_counts = df_filtered['country'].value_counts().reset_index()
210
- country_counts.columns = ['country', 'counts']
211
 
212
- # Dodanie kolumny z kodem kraju (ISO Alpha-3)
213
- def get_country_code(name):
214
- try:
215
- return pycountry.countries.lookup(name).alpha_3
216
- except:
217
- return None
218
 
219
- country_counts['iso_alpha'] = country_counts['country'].apply(get_country_code)
220
- country_counts = country_counts.dropna(subset=['iso_alpha'])
221
 
222
- # Tworzenie mapy choropleth bez u偶ycia Mapbox
223
  fig_map = px.choropleth(
224
- country_counts,
225
  locations='iso_alpha',
226
  color='counts',
227
  hover_name='country',
228
  color_continuous_scale=px.colors.sequential.Plasma,
229
- title=translations['fraud_country_distribution']
230
  )
231
- fig_map.update_geos(showcountries=True, showcoastlines=True) # Poprawka: usuni臋to powt贸rzenie 'showcountries'
232
  st.plotly_chart(fig_map, use_container_width=True)
233
  else:
234
  st.info(translations['no_data'])
 
29
  'frauds_over_time': "Liczba wykrytych oszustw w czasie",
30
  'risk_distribution': "Rozk艂ad ocen ryzyka oszustwa",
31
  'fraud_country_distribution': "Rozk艂ad oszustw wed艂ug kraj贸w",
32
+ 'fraud_trend_title': "Trendy oszustw w czasie",
33
+ 'risk_distribution_title': "Rozk艂ad ocen ryzyka oszustwa",
34
+ 'fraud_country_distribution_title': "Rozk艂ad oszustw wed艂ug kraj贸w",
35
  'heatmap_title': "Mapa ciep艂a oszustw w czasie",
36
  'fraud_vs_nonfraud': "Procentowy podzia艂: Oszustwa vs Bezpieczne",
37
  'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
 
 
38
  'download_button': "馃摜 Pobierz dane jako CSV"
39
  },
40
  'German': {
 
49
  'frauds_over_time': "Anzahl der erkannten Betr眉gereien im Laufe der Zeit",
50
  'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
51
  'fraud_country_distribution': "Betrug nach L盲ndern",
52
+ 'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
53
+ 'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
54
+ 'fraud_country_distribution_title': "Betrug nach L盲ndern",
55
  'heatmap_title': "Heatmap der Betr眉gereien im Laufe der Zeit",
56
  'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
57
  'no_data': "Keine Daten zur Anzeige verf眉gbar.",
 
 
58
  'download_button': "馃摜 Daten als CSV herunterladen"
59
  },
60
  'English': {
 
69
  'frauds_over_time': "Number of Detected Frauds Over Time",
70
  'risk_distribution': "Distribution of Fraud Risk Scores",
71
  'fraud_country_distribution': "Fraud Distribution by Countries",
72
+ 'fraud_trend_title': "Fraud Trends Over Time",
73
+ 'risk_distribution_title': "Distribution of Fraud Risk Scores",
74
+ 'fraud_country_distribution_title': "Fraud Distribution by Countries",
75
  'heatmap_title': "Fraud Heatmap Over Time",
76
  'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
77
  'no_data': "No data available to display.",
 
 
78
  'download_button': "馃摜 Download data as CSV"
79
  }
80
  }
 
117
  if history:
118
  st.markdown(f"### {translations['history_title']}")
119
 
 
 
 
 
 
120
  df_history = pd.DataFrame(history)
121
 
122
  # Sprawdzenie, czy 'timestamp' istnieje
 
130
  # Dodanie kolumny 'date' dla wizualizacji
131
  df_history['date'] = df_history['timestamp'].dt.date
132
 
133
+ # Usuni臋cie sekcji wyszukiwania i filtrowania po dacie
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  # Wy艣wietlenie tabeli historii
136
+ st.dataframe(df_history[['timestamp', 'phone_number', 'risk_assessment']], height=300)
137
 
138
  # Opcjonalnie: Dodanie mo偶liwo艣ci eksportu danych
139
+ if not df_history.empty:
140
+ csv = df_history.to_csv(index=False).encode('utf-8')
 
 
 
 
 
 
 
141
  st.download_button(
142
+ label=translations['download_button'],
143
  data=csv,
144
  file_name='analysis_history.csv',
145
  mime='text/csv',
146
  )
147
 
148
+ # Trend oszustw w czasie
149
+ st.markdown(f"### {translations['fraud_trend_title']}")
150
+ fraud_over_time = df_history.groupby(df_history['timestamp'].dt.date)['phone_number'].count().reset_index()
151
+ fraud_over_time.rename(columns={'phone_number': 'frauds_detected'}, inplace=True)
152
+ fig_trend = px.line(fraud_over_time, x='timestamp', y='frauds_detected', title=translations['frauds_over_time'])
153
+ st.plotly_chart(fig_trend, use_container_width=True)
154
 
155
+ # Rozk艂ad ocen ryzyka
156
+ st.markdown(f"### {translations['risk_distribution_title']}")
157
  def extract_risk_score(risk_assessment):
158
  match = re.search(r'(\d+)/10', risk_assessment)
159
  return int(match.group(1)) if match else 0
160
 
161
+ df_history['risk_score'] = df_history['risk_assessment'].apply(extract_risk_score)
162
+ risk_distribution = df_history['risk_score'].value_counts().sort_index().reset_index()
163
+ risk_distribution.columns = ['risk_score', 'count']
164
+ fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'], labels={'risk_score': 'Risk Score', 'count': 'Number of Messages'}, color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
165
+ st.plotly_chart(fig_risk, use_container_width=True)
 
 
 
 
166
 
167
+ # Rozk艂ad oszustw wed艂ug kraj贸w
168
+ st.markdown(f"### {translations['fraud_country_distribution_title']}")
169
  def add_country_info(row):
170
  country, _ = get_phone_info(row['phone_number'])
171
  return country
172
 
173
+ df_history['country'] = df_history.apply(add_country_info, axis=1)
174
+ fraud_countries = df_history['country'].value_counts().reset_index()
175
+ fraud_countries.columns = ['country', 'counts']
 
 
 
176
 
177
+ # Dodanie kod贸w kraj贸w
178
+ def get_country_code(name):
179
+ try:
180
+ return pycountry.countries.lookup(name).alpha_3
181
+ except:
182
+ return None
183
 
184
+ fraud_countries['iso_alpha'] = fraud_countries['country'].apply(get_country_code)
185
+ fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
186
 
187
+ if not fraud_countries.empty:
188
  fig_map = px.choropleth(
189
+ fraud_countries,
190
  locations='iso_alpha',
191
  color='counts',
192
  hover_name='country',
193
  color_continuous_scale=px.colors.sequential.Plasma,
194
+ title=translations['fraud_country_distribution_title']
195
  )
196
+ fig_map.update_geos(showcountries=True, showcoastlines=True)
197
  st.plotly_chart(fig_map, use_container_width=True)
198
  else:
199
  st.info(translations['no_data'])