rafaldembski commited on
Commit
27f4c94
verified
1 Parent(s): 004670a

Update pages/Statistics.py

Browse files
Files changed (1) hide show
  1. pages/Statistics.py +77 -89
pages/Statistics.py CHANGED
@@ -9,10 +9,14 @@ import os
9
  import re
10
  from datetime import datetime, timedelta
11
  import pycountry
 
 
 
 
12
 
13
  # Opcjonalne: Importowanie dodatkowych komponent贸w
14
- # from streamlit_extras.metric_cards import style_metric_cards
15
- # from streamlit_elements import elements, mui, html
16
 
17
  # Definiowanie 艣cie偶ek do plik贸w JSON
18
  FAKE_NUMBERS_FILE = os.path.join('data', 'fake_numbers.json')
@@ -36,7 +40,7 @@ page_translations = {
36
  'fraud_trend_title': "Trendy Oszustw w Czasie",
37
  'risk_distribution': "Rozk艂ad Ocen Ryzyka Oszustwa",
38
  'fraud_country_distribution': "Rozk艂ad Oszustw Wed艂ug Kraj贸w",
39
- 'heatmap_title': "Heatmapa Oszustw",
40
  'fraud_vs_nonfraud': "Procentowy Podzia艂: Oszustwa vs Bezpieczne",
41
  'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
42
  'download_button': "馃摜 Pobierz dane jako CSV",
@@ -58,7 +62,7 @@ page_translations = {
58
  'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
59
  'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
60
  'fraud_country_distribution': "Betrug nach L盲ndern",
61
- 'heatmap_title': "Heatmap der Betr眉gereien",
62
  'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
63
  'no_data': "Keine Daten zur Anzeige verf眉gbar.",
64
  'download_button': "馃摜 Daten als CSV herunterladen",
@@ -80,7 +84,7 @@ page_translations = {
80
  'fraud_trend_title': "Fraud Trends Over Time",
81
  'risk_distribution': "Distribution of Fraud Risk Scores",
82
  'fraud_country_distribution': "Fraud Distribution by Countries",
83
- 'heatmap_title': "Fraud Heatmap",
84
  'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
85
  'no_data': "No data available to display.",
86
  'download_button': "馃摜 Download data as CSV",
@@ -89,67 +93,22 @@ page_translations = {
89
  }
90
  }
91
 
92
- def load_json(file_path):
93
- """艁aduje dane z pliku JSON."""
94
- if not os.path.exists(file_path):
95
- if file_path.endswith('stats.json'):
96
- return {"total_analyses": 0, "total_frauds_detected": 0}
97
- else:
98
- return []
99
- with open(file_path, 'r', encoding='utf-8') as file:
100
- try:
101
- data = json.load(file)
102
- return data
103
- except json.JSONDecodeError:
104
- st.error(f"Nie mo偶na za艂adowa膰 danych z {file_path}. Plik jest uszkodzony.")
105
- if file_path.endswith('stats.json'):
106
- return {"total_analyses": 0, "total_frauds_detected": 0}
107
- return []
108
-
109
- def save_json(file_path, data):
110
- """Zapisuje dane do pliku JSON."""
111
- with open(file_path, 'w', encoding='utf-8') as file:
112
- json.dump(data, file, ensure_ascii=False, indent=4)
113
- st.success(f"Dane zosta艂y zapisane do {file_path}.")
114
-
115
- def get_stats_from_json():
116
- """Pobiera statystyki z pliku stats.json."""
117
- stats = load_json(STATS_FILE)
118
- return stats
119
-
120
- def get_history_from_json():
121
- """Pobiera histori臋 analiz z pliku history.json."""
122
- return load_json(HISTORY_FILE)
123
-
124
- def get_fake_numbers_from_json():
125
- """Pobiera fa艂szywe numery z pliku fake_numbers.json."""
126
- return load_json(FAKE_NUMBERS_FILE)
127
-
128
- def get_country_code(name):
129
- """Zwraca kod ISO-3 kraju na podstawie jego nazwy."""
130
- try:
131
- country = pycountry.countries.lookup(name)
132
- return country.alpha_3
133
- except LookupError:
134
- return None
135
-
136
  def main(language):
137
  translations = page_translations.get(language, page_translations['English'])
138
 
139
- # Wy艣wietlenie nag艂贸wka
140
  st.title(translations['header'])
141
  st.markdown(translations['description'])
142
 
143
  # Pobieranie danych z plik贸w JSON
144
- stats = get_stats_from_json()
145
- history = get_history_from_json()
146
 
147
  # Kluczowe metryki
148
  total_analyses = stats.get("total_analyses", 0)
149
  total_frauds_detected = stats.get("total_frauds_detected", 0)
150
 
151
  # Stylizacja kart metryk
152
- # style_metric_cards() # Uncomment if using metric_cards
153
 
154
  # Wy艣wietlenie metryk
155
  col1, col2, col3 = st.columns(3)
@@ -161,21 +120,21 @@ def main(language):
161
  fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz
162
  col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
163
 
164
- # Dodanie interaktywnego filtra daty
165
- st.markdown("### " + translations['select_date_range'])
166
- today = datetime.now().date()
167
- recent_days = translations['recent_days']
168
- start_date = st.date_input(
169
  "Start Date",
170
- value=today - timedelta(days=recent_days),
171
- min_value=today - timedelta(days=365),
172
- max_value=today
173
  )
174
- end_date = st.date_input(
175
  "End Date",
176
- value=today,
177
  min_value=start_date,
178
- max_value=today
179
  )
180
 
181
  # Filtracja historii na podstawie daty
@@ -187,9 +146,27 @@ def main(language):
187
  mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
188
  df_filtered = df_history.loc[mask]
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  # Aktualizacja statystyk na podstawie filtrowanej historii
191
  total_filtered = df_filtered.shape[0]
192
- frauds_filtered = df_filtered[df_filtered['risk_assessment'].str.contains(r'\d+/10')]['risk_assessment'].apply(lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7).sum()
193
  fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
194
 
195
  # Wy艣wietlenie metryk dla filtrowanej historii
@@ -222,7 +199,7 @@ def main(language):
222
  def extract_risk_score(risk_assessment):
223
  match = re.search(r'(\d+)/10', risk_assessment)
224
  return int(match.group(1)) if match else 0
225
-
226
  df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
227
  risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
228
  risk_distribution.columns = ['risk_score', 'count']
@@ -236,15 +213,15 @@ def main(language):
236
  def get_country(row):
237
  country, _ = get_phone_info(row['phone_number'])
238
  return country if country else "Unknown"
239
-
240
  df_filtered['country'] = df_filtered.apply(get_country, axis=1)
241
  fraud_countries = df_filtered['country'].value_counts().reset_index()
242
  fraud_countries.columns = ['country', 'counts']
243
-
244
  # Dodanie kod贸w kraj贸w
245
- fraud_countries['iso_alpha'] = fraud_countries['country'].apply(get_country_code)
246
  fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
247
-
248
  if not fraud_countries.empty:
249
  fig_map = px.choropleth(
250
  fraud_countries,
@@ -254,30 +231,42 @@ def main(language):
254
  color_continuous_scale=px.colors.sequential.Plasma,
255
  title=translations['fraud_country_distribution_title']
256
  )
257
- fig_map.update_geos(showcountries=True, showcoastlines=True) # Usuni臋to powtarzaj膮cy si臋 'showcountries'
258
  st.plotly_chart(fig_map, use_container_width=True)
259
  else:
260
  st.info(translations['no_data'])
261
-
 
 
262
  # Dodatkowe Wizualizacje
263
  st.markdown("### " + translations['heatmap_title'])
264
  # Heatmapa oszustw na podstawie lokalizacji
265
  if not fraud_countries.empty:
266
  # Przygotowanie danych geograficznych
267
- # U偶yjemy szeroko艣ci i d艂ugo艣ci geograficznej kraj贸w z pycountry lub manualnie
268
- country_coords = {
269
- 'Poland': (52.237049, 21.017532),
270
- 'Germany': (51.165691, 10.451526),
271
- 'Unknown': (20.0, 0.0) # Centrum 艣wiata
272
- # Dodaj inne kraje w razie potrzeby
273
- }
274
-
275
- def get_lat_lon(country_name):
276
- return country_coords.get(country_name, (20.0, 0.0))
277
-
278
- fraud_countries['lat'] = fraud_countries['country'].apply(lambda x: get_lat_lon(x)[0])
279
- fraud_countries['lon'] = fraud_countries['country'].apply(lambda x: get_lat_lon(x)[1])
280
-
 
 
 
 
 
 
 
 
 
 
281
  # Tworzenie Heatmapy
282
  fig_heatmap = px.density_mapbox(
283
  fraud_countries,
@@ -290,11 +279,11 @@ def main(language):
290
  mapbox_style="stamen-terrain",
291
  title=translations['heatmap_title']
292
  )
293
- fig_heatmap.update_geos(showcountries=True, showcoastlines=True) # Naprawiony argument
294
  st.plotly_chart(fig_heatmap, use_container_width=True)
295
  else:
296
  st.info(translations['no_data'])
297
-
298
  # Gauge Chart - Procentowy udzia艂 oszustw
299
  st.markdown("### " + translations['fraud_percentage'])
300
  fig_gauge = go.Figure(go.Indicator(
@@ -319,4 +308,3 @@ def main(language):
319
  }
320
  ))
321
  st.plotly_chart(fig_gauge, use_container_width=True)
322
-
 
9
  import re
10
  from datetime import datetime, timedelta
11
  import pycountry
12
+ import requests
13
+
14
+ # Importowanie funkcji z utils/functions.py
15
+ from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers
16
 
17
  # Opcjonalne: Importowanie dodatkowych komponent贸w
18
+ from streamlit_extras.metric_cards import style_metric_cards
19
+ from streamlit_elements import elements, mui, html
20
 
21
  # Definiowanie 艣cie偶ek do plik贸w JSON
22
  FAKE_NUMBERS_FILE = os.path.join('data', 'fake_numbers.json')
 
40
  'fraud_trend_title': "Trendy Oszustw w Czasie",
41
  'risk_distribution': "Rozk艂ad Ocen Ryzyka Oszustwa",
42
  'fraud_country_distribution': "Rozk艂ad Oszustw Wed艂ug Kraj贸w",
43
+ 'heatmap_title': "Heatmapa Oszustw w Czasie",
44
  'fraud_vs_nonfraud': "Procentowy Podzia艂: Oszustwa vs Bezpieczne",
45
  'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
46
  'download_button': "馃摜 Pobierz dane jako CSV",
 
62
  'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
63
  'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
64
  'fraud_country_distribution': "Betrug nach L盲ndern",
65
+ 'heatmap_title': "Heatmap der Betr眉gereien im Laufe der Zeit",
66
  'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
67
  'no_data': "Keine Daten zur Anzeige verf眉gbar.",
68
  'download_button': "馃摜 Daten als CSV herunterladen",
 
84
  'fraud_trend_title': "Fraud Trends Over Time",
85
  'risk_distribution': "Distribution of Fraud Risk Scores",
86
  'fraud_country_distribution': "Fraud Distribution by Countries",
87
+ 'heatmap_title': "Fraud Heatmap Over Time",
88
  'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
89
  'no_data': "No data available to display.",
90
  'download_button': "馃摜 Download data as CSV",
 
93
  }
94
  }
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def main(language):
97
  translations = page_translations.get(language, page_translations['English'])
98
 
 
99
  st.title(translations['header'])
100
  st.markdown(translations['description'])
101
 
102
  # Pobieranie danych z plik贸w JSON
103
+ stats = get_stats()
104
+ history = get_history()
105
 
106
  # Kluczowe metryki
107
  total_analyses = stats.get("total_analyses", 0)
108
  total_frauds_detected = stats.get("total_frauds_detected", 0)
109
 
110
  # Stylizacja kart metryk
111
+ style_metric_cards()
112
 
113
  # Wy艣wietlenie metryk
114
  col1, col2, col3 = st.columns(3)
 
120
  fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz
121
  col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
122
 
123
+ st.markdown("---")
124
+
125
+ # Dodanie interaktywnego filtra daty w bocznym panelu
126
+ st.sidebar.header(translations['select_date_range'])
127
+ start_date = st.sidebar.date_input(
128
  "Start Date",
129
+ value=datetime.now().date() - timedelta(days=translations['recent_days']),
130
+ min_value=datetime.now().date() - timedelta(days=365),
131
+ max_value=datetime.now().date()
132
  )
133
+ end_date = st.sidebar.date_input(
134
  "End Date",
135
+ value=datetime.now().date(),
136
  min_value=start_date,
137
+ max_value=datetime.now().date()
138
  )
139
 
140
  # Filtracja historii na podstawie daty
 
146
  mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
147
  df_filtered = df_history.loc[mask]
148
 
149
+ # Wy艣wietlenie tabeli historii analiz
150
+ st.markdown(f"### {translations['history_title']}")
151
+ if not df_filtered.empty:
152
+ st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)
153
+
154
+ # Opcjonalnie: Dodanie mo偶liwo艣ci eksportu danych
155
+ csv = df_filtered.to_csv(index=False).encode('utf-8')
156
+ st.download_button(
157
+ label=translations['download_button'],
158
+ data=csv,
159
+ file_name='analysis_history.csv',
160
+ mime='text/csv',
161
+ )
162
+ else:
163
+ st.info(translations['no_data'])
164
+
165
+ st.markdown("---")
166
+
167
  # Aktualizacja statystyk na podstawie filtrowanej historii
168
  total_filtered = df_filtered.shape[0]
169
+ frauds_filtered = df_filtered['risk_assessment'].apply(lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
170
  fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
171
 
172
  # Wy艣wietlenie metryk dla filtrowanej historii
 
199
  def extract_risk_score(risk_assessment):
200
  match = re.search(r'(\d+)/10', risk_assessment)
201
  return int(match.group(1)) if match else 0
202
+
203
  df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
204
  risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
205
  risk_distribution.columns = ['risk_score', 'count']
 
213
  def get_country(row):
214
  country, _ = get_phone_info(row['phone_number'])
215
  return country if country else "Unknown"
216
+
217
  df_filtered['country'] = df_filtered.apply(get_country, axis=1)
218
  fraud_countries = df_filtered['country'].value_counts().reset_index()
219
  fraud_countries.columns = ['country', 'counts']
220
+
221
  # Dodanie kod贸w kraj贸w
222
+ fraud_countries['iso_alpha'] = fraud_countries['country'].apply(lambda x: pycountry.countries.lookup(x).alpha_3 if x != "Unknown" else None)
223
  fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
224
+
225
  if not fraud_countries.empty:
226
  fig_map = px.choropleth(
227
  fraud_countries,
 
231
  color_continuous_scale=px.colors.sequential.Plasma,
232
  title=translations['fraud_country_distribution_title']
233
  )
234
+ fig_map.update_geos(showcountries=True, showcoastlines=True)
235
  st.plotly_chart(fig_map, use_container_width=True)
236
  else:
237
  st.info(translations['no_data'])
238
+
239
+ st.markdown("---")
240
+
241
  # Dodatkowe Wizualizacje
242
  st.markdown("### " + translations['heatmap_title'])
243
  # Heatmapa oszustw na podstawie lokalizacji
244
  if not fraud_countries.empty:
245
  # Przygotowanie danych geograficznych
246
+ # U偶yjemy szeroko艣ci i d艂ugo艣ci geograficznej kraj贸w
247
+ country_coords = {}
248
+ for country in fraud_countries['country'].unique():
249
+ if country == "Unknown":
250
+ country_coords[country] = (0, 0) # Centrum 艣wiata
251
+ else:
252
+ try:
253
+ country_obj = pycountry.countries.lookup(country)
254
+ # U偶yjemy 艣rednich szeroko艣ci i d艂ugo艣ci geograficznej
255
+ geocode_url = f"https://restcountries.com/v3.1/name/{country}"
256
+ response = requests.get(geocode_url)
257
+ if response.status_code == 200:
258
+ data = response.json()
259
+ lat = data[0]['latlng'][0]
260
+ lon = data[0]['latlng'][1]
261
+ country_coords[country] = (lat, lon)
262
+ else:
263
+ country_coords[country] = (0, 0)
264
+ except:
265
+ country_coords[country] = (0, 0)
266
+
267
+ fraud_countries['lat'] = fraud_countries['country'].apply(lambda x: country_coords.get(x, (0,0))[0])
268
+ fraud_countries['lon'] = fraud_countries['country'].apply(lambda x: country_coords.get(x, (0,0))[1])
269
+
270
  # Tworzenie Heatmapy
271
  fig_heatmap = px.density_mapbox(
272
  fraud_countries,
 
279
  mapbox_style="stamen-terrain",
280
  title=translations['heatmap_title']
281
  )
282
+ fig_heatmap.update_geos(showcountries=True, showcoastlines=True)
283
  st.plotly_chart(fig_heatmap, use_container_width=True)
284
  else:
285
  st.info(translations['no_data'])
286
+
287
  # Gauge Chart - Procentowy udzia艂 oszustw
288
  st.markdown("### " + translations['fraud_percentage'])
289
  fig_gauge = go.Figure(go.Indicator(
 
308
  }
309
  ))
310
  st.plotly_chart(fig_gauge, use_container_width=True)