Spaces:
Running
Running
Update pages/Statistics.py
Browse files- pages/Statistics.py +77 -89
pages/Statistics.py
CHANGED
@@ -9,10 +9,14 @@ import os
|
|
9 |
import re
|
10 |
from datetime import datetime, timedelta
|
11 |
import pycountry
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Opcjonalne: Importowanie dodatkowych komponent贸w
|
14 |
-
|
15 |
-
|
16 |
|
17 |
# Definiowanie 艣cie偶ek do plik贸w JSON
|
18 |
FAKE_NUMBERS_FILE = os.path.join('data', 'fake_numbers.json')
|
@@ -36,7 +40,7 @@ page_translations = {
|
|
36 |
'fraud_trend_title': "Trendy Oszustw w Czasie",
|
37 |
'risk_distribution': "Rozk艂ad Ocen Ryzyka Oszustwa",
|
38 |
'fraud_country_distribution': "Rozk艂ad Oszustw Wed艂ug Kraj贸w",
|
39 |
-
'heatmap_title': "Heatmapa Oszustw",
|
40 |
'fraud_vs_nonfraud': "Procentowy Podzia艂: Oszustwa vs Bezpieczne",
|
41 |
'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
|
42 |
'download_button': "馃摜 Pobierz dane jako CSV",
|
@@ -58,7 +62,7 @@ page_translations = {
|
|
58 |
'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
|
59 |
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
|
60 |
'fraud_country_distribution': "Betrug nach L盲ndern",
|
61 |
-
'heatmap_title': "Heatmap der Betr眉gereien",
|
62 |
'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
|
63 |
'no_data': "Keine Daten zur Anzeige verf眉gbar.",
|
64 |
'download_button': "馃摜 Daten als CSV herunterladen",
|
@@ -80,7 +84,7 @@ page_translations = {
|
|
80 |
'fraud_trend_title': "Fraud Trends Over Time",
|
81 |
'risk_distribution': "Distribution of Fraud Risk Scores",
|
82 |
'fraud_country_distribution': "Fraud Distribution by Countries",
|
83 |
-
'heatmap_title': "Fraud Heatmap",
|
84 |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
|
85 |
'no_data': "No data available to display.",
|
86 |
'download_button': "馃摜 Download data as CSV",
|
@@ -89,67 +93,22 @@ page_translations = {
|
|
89 |
}
|
90 |
}
|
91 |
|
92 |
-
def load_json(file_path):
|
93 |
-
"""艁aduje dane z pliku JSON."""
|
94 |
-
if not os.path.exists(file_path):
|
95 |
-
if file_path.endswith('stats.json'):
|
96 |
-
return {"total_analyses": 0, "total_frauds_detected": 0}
|
97 |
-
else:
|
98 |
-
return []
|
99 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
100 |
-
try:
|
101 |
-
data = json.load(file)
|
102 |
-
return data
|
103 |
-
except json.JSONDecodeError:
|
104 |
-
st.error(f"Nie mo偶na za艂adowa膰 danych z {file_path}. Plik jest uszkodzony.")
|
105 |
-
if file_path.endswith('stats.json'):
|
106 |
-
return {"total_analyses": 0, "total_frauds_detected": 0}
|
107 |
-
return []
|
108 |
-
|
109 |
-
def save_json(file_path, data):
|
110 |
-
"""Zapisuje dane do pliku JSON."""
|
111 |
-
with open(file_path, 'w', encoding='utf-8') as file:
|
112 |
-
json.dump(data, file, ensure_ascii=False, indent=4)
|
113 |
-
st.success(f"Dane zosta艂y zapisane do {file_path}.")
|
114 |
-
|
115 |
-
def get_stats_from_json():
|
116 |
-
"""Pobiera statystyki z pliku stats.json."""
|
117 |
-
stats = load_json(STATS_FILE)
|
118 |
-
return stats
|
119 |
-
|
120 |
-
def get_history_from_json():
|
121 |
-
"""Pobiera histori臋 analiz z pliku history.json."""
|
122 |
-
return load_json(HISTORY_FILE)
|
123 |
-
|
124 |
-
def get_fake_numbers_from_json():
|
125 |
-
"""Pobiera fa艂szywe numery z pliku fake_numbers.json."""
|
126 |
-
return load_json(FAKE_NUMBERS_FILE)
|
127 |
-
|
128 |
-
def get_country_code(name):
|
129 |
-
"""Zwraca kod ISO-3 kraju na podstawie jego nazwy."""
|
130 |
-
try:
|
131 |
-
country = pycountry.countries.lookup(name)
|
132 |
-
return country.alpha_3
|
133 |
-
except LookupError:
|
134 |
-
return None
|
135 |
-
|
136 |
def main(language):
|
137 |
translations = page_translations.get(language, page_translations['English'])
|
138 |
|
139 |
-
# Wy艣wietlenie nag艂贸wka
|
140 |
st.title(translations['header'])
|
141 |
st.markdown(translations['description'])
|
142 |
|
143 |
# Pobieranie danych z plik贸w JSON
|
144 |
-
stats =
|
145 |
-
history =
|
146 |
|
147 |
# Kluczowe metryki
|
148 |
total_analyses = stats.get("total_analyses", 0)
|
149 |
total_frauds_detected = stats.get("total_frauds_detected", 0)
|
150 |
|
151 |
# Stylizacja kart metryk
|
152 |
-
|
153 |
|
154 |
# Wy艣wietlenie metryk
|
155 |
col1, col2, col3 = st.columns(3)
|
@@ -161,21 +120,21 @@ def main(language):
|
|
161 |
fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz
|
162 |
col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
start_date = st.date_input(
|
169 |
"Start Date",
|
170 |
-
value=
|
171 |
-
min_value=
|
172 |
-
max_value=
|
173 |
)
|
174 |
-
end_date = st.date_input(
|
175 |
"End Date",
|
176 |
-
value=
|
177 |
min_value=start_date,
|
178 |
-
max_value=
|
179 |
)
|
180 |
|
181 |
# Filtracja historii na podstawie daty
|
@@ -187,9 +146,27 @@ def main(language):
|
|
187 |
mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
|
188 |
df_filtered = df_history.loc[mask]
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
# Aktualizacja statystyk na podstawie filtrowanej historii
|
191 |
total_filtered = df_filtered.shape[0]
|
192 |
-
frauds_filtered = df_filtered[
|
193 |
fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
|
194 |
|
195 |
# Wy艣wietlenie metryk dla filtrowanej historii
|
@@ -222,7 +199,7 @@ def main(language):
|
|
222 |
def extract_risk_score(risk_assessment):
|
223 |
match = re.search(r'(\d+)/10', risk_assessment)
|
224 |
return int(match.group(1)) if match else 0
|
225 |
-
|
226 |
df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
|
227 |
risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
|
228 |
risk_distribution.columns = ['risk_score', 'count']
|
@@ -236,15 +213,15 @@ def main(language):
|
|
236 |
def get_country(row):
|
237 |
country, _ = get_phone_info(row['phone_number'])
|
238 |
return country if country else "Unknown"
|
239 |
-
|
240 |
df_filtered['country'] = df_filtered.apply(get_country, axis=1)
|
241 |
fraud_countries = df_filtered['country'].value_counts().reset_index()
|
242 |
fraud_countries.columns = ['country', 'counts']
|
243 |
-
|
244 |
# Dodanie kod贸w kraj贸w
|
245 |
-
fraud_countries['iso_alpha'] = fraud_countries['country'].apply(
|
246 |
fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
|
247 |
-
|
248 |
if not fraud_countries.empty:
|
249 |
fig_map = px.choropleth(
|
250 |
fraud_countries,
|
@@ -254,30 +231,42 @@ def main(language):
|
|
254 |
color_continuous_scale=px.colors.sequential.Plasma,
|
255 |
title=translations['fraud_country_distribution_title']
|
256 |
)
|
257 |
-
fig_map.update_geos(showcountries=True, showcoastlines=True)
|
258 |
st.plotly_chart(fig_map, use_container_width=True)
|
259 |
else:
|
260 |
st.info(translations['no_data'])
|
261 |
-
|
|
|
|
|
262 |
# Dodatkowe Wizualizacje
|
263 |
st.markdown("### " + translations['heatmap_title'])
|
264 |
# Heatmapa oszustw na podstawie lokalizacji
|
265 |
if not fraud_countries.empty:
|
266 |
# Przygotowanie danych geograficznych
|
267 |
-
# U偶yjemy szeroko艣ci i d艂ugo艣ci geograficznej kraj贸w
|
268 |
-
country_coords = {
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
# Tworzenie Heatmapy
|
282 |
fig_heatmap = px.density_mapbox(
|
283 |
fraud_countries,
|
@@ -290,11 +279,11 @@ def main(language):
|
|
290 |
mapbox_style="stamen-terrain",
|
291 |
title=translations['heatmap_title']
|
292 |
)
|
293 |
-
fig_heatmap.update_geos(showcountries=True, showcoastlines=True)
|
294 |
st.plotly_chart(fig_heatmap, use_container_width=True)
|
295 |
else:
|
296 |
st.info(translations['no_data'])
|
297 |
-
|
298 |
# Gauge Chart - Procentowy udzia艂 oszustw
|
299 |
st.markdown("### " + translations['fraud_percentage'])
|
300 |
fig_gauge = go.Figure(go.Indicator(
|
@@ -319,4 +308,3 @@ def main(language):
|
|
319 |
}
|
320 |
))
|
321 |
st.plotly_chart(fig_gauge, use_container_width=True)
|
322 |
-
|
|
|
9 |
import re
|
10 |
from datetime import datetime, timedelta
|
11 |
import pycountry
|
12 |
+
import requests
|
13 |
+
|
14 |
+
# Importowanie funkcji z utils/functions.py
|
15 |
+
from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers
|
16 |
|
17 |
# Opcjonalne: Importowanie dodatkowych komponent贸w
|
18 |
+
from streamlit_extras.metric_cards import style_metric_cards
|
19 |
+
from streamlit_elements import elements, mui, html
|
20 |
|
21 |
# Definiowanie 艣cie偶ek do plik贸w JSON
|
22 |
FAKE_NUMBERS_FILE = os.path.join('data', 'fake_numbers.json')
|
|
|
40 |
'fraud_trend_title': "Trendy Oszustw w Czasie",
|
41 |
'risk_distribution': "Rozk艂ad Ocen Ryzyka Oszustwa",
|
42 |
'fraud_country_distribution': "Rozk艂ad Oszustw Wed艂ug Kraj贸w",
|
43 |
+
'heatmap_title': "Heatmapa Oszustw w Czasie",
|
44 |
'fraud_vs_nonfraud': "Procentowy Podzia艂: Oszustwa vs Bezpieczne",
|
45 |
'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
|
46 |
'download_button': "馃摜 Pobierz dane jako CSV",
|
|
|
62 |
'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
|
63 |
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
|
64 |
'fraud_country_distribution': "Betrug nach L盲ndern",
|
65 |
+
'heatmap_title': "Heatmap der Betr眉gereien im Laufe der Zeit",
|
66 |
'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
|
67 |
'no_data': "Keine Daten zur Anzeige verf眉gbar.",
|
68 |
'download_button': "馃摜 Daten als CSV herunterladen",
|
|
|
84 |
'fraud_trend_title': "Fraud Trends Over Time",
|
85 |
'risk_distribution': "Distribution of Fraud Risk Scores",
|
86 |
'fraud_country_distribution': "Fraud Distribution by Countries",
|
87 |
+
'heatmap_title': "Fraud Heatmap Over Time",
|
88 |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
|
89 |
'no_data': "No data available to display.",
|
90 |
'download_button': "馃摜 Download data as CSV",
|
|
|
93 |
}
|
94 |
}
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
def main(language):
|
97 |
translations = page_translations.get(language, page_translations['English'])
|
98 |
|
|
|
99 |
st.title(translations['header'])
|
100 |
st.markdown(translations['description'])
|
101 |
|
102 |
# Pobieranie danych z plik贸w JSON
|
103 |
+
stats = get_stats()
|
104 |
+
history = get_history()
|
105 |
|
106 |
# Kluczowe metryki
|
107 |
total_analyses = stats.get("total_analyses", 0)
|
108 |
total_frauds_detected = stats.get("total_frauds_detected", 0)
|
109 |
|
110 |
# Stylizacja kart metryk
|
111 |
+
style_metric_cards()
|
112 |
|
113 |
# Wy艣wietlenie metryk
|
114 |
col1, col2, col3 = st.columns(3)
|
|
|
120 |
fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz
|
121 |
col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
|
122 |
|
123 |
+
st.markdown("---")
|
124 |
+
|
125 |
+
# Dodanie interaktywnego filtra daty w bocznym panelu
|
126 |
+
st.sidebar.header(translations['select_date_range'])
|
127 |
+
start_date = st.sidebar.date_input(
|
128 |
"Start Date",
|
129 |
+
value=datetime.now().date() - timedelta(days=translations['recent_days']),
|
130 |
+
min_value=datetime.now().date() - timedelta(days=365),
|
131 |
+
max_value=datetime.now().date()
|
132 |
)
|
133 |
+
end_date = st.sidebar.date_input(
|
134 |
"End Date",
|
135 |
+
value=datetime.now().date(),
|
136 |
min_value=start_date,
|
137 |
+
max_value=datetime.now().date()
|
138 |
)
|
139 |
|
140 |
# Filtracja historii na podstawie daty
|
|
|
146 |
mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
|
147 |
df_filtered = df_history.loc[mask]
|
148 |
|
149 |
+
# Wy艣wietlenie tabeli historii analiz
|
150 |
+
st.markdown(f"### {translations['history_title']}")
|
151 |
+
if not df_filtered.empty:
|
152 |
+
st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)
|
153 |
+
|
154 |
+
# Opcjonalnie: Dodanie mo偶liwo艣ci eksportu danych
|
155 |
+
csv = df_filtered.to_csv(index=False).encode('utf-8')
|
156 |
+
st.download_button(
|
157 |
+
label=translations['download_button'],
|
158 |
+
data=csv,
|
159 |
+
file_name='analysis_history.csv',
|
160 |
+
mime='text/csv',
|
161 |
+
)
|
162 |
+
else:
|
163 |
+
st.info(translations['no_data'])
|
164 |
+
|
165 |
+
st.markdown("---")
|
166 |
+
|
167 |
# Aktualizacja statystyk na podstawie filtrowanej historii
|
168 |
total_filtered = df_filtered.shape[0]
|
169 |
+
frauds_filtered = df_filtered['risk_assessment'].apply(lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
|
170 |
fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
|
171 |
|
172 |
# Wy艣wietlenie metryk dla filtrowanej historii
|
|
|
199 |
def extract_risk_score(risk_assessment):
|
200 |
match = re.search(r'(\d+)/10', risk_assessment)
|
201 |
return int(match.group(1)) if match else 0
|
202 |
+
|
203 |
df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
|
204 |
risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
|
205 |
risk_distribution.columns = ['risk_score', 'count']
|
|
|
213 |
def get_country(row):
|
214 |
country, _ = get_phone_info(row['phone_number'])
|
215 |
return country if country else "Unknown"
|
216 |
+
|
217 |
df_filtered['country'] = df_filtered.apply(get_country, axis=1)
|
218 |
fraud_countries = df_filtered['country'].value_counts().reset_index()
|
219 |
fraud_countries.columns = ['country', 'counts']
|
220 |
+
|
221 |
# Dodanie kod贸w kraj贸w
|
222 |
+
fraud_countries['iso_alpha'] = fraud_countries['country'].apply(lambda x: pycountry.countries.lookup(x).alpha_3 if x != "Unknown" else None)
|
223 |
fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
|
224 |
+
|
225 |
if not fraud_countries.empty:
|
226 |
fig_map = px.choropleth(
|
227 |
fraud_countries,
|
|
|
231 |
color_continuous_scale=px.colors.sequential.Plasma,
|
232 |
title=translations['fraud_country_distribution_title']
|
233 |
)
|
234 |
+
fig_map.update_geos(showcountries=True, showcoastlines=True)
|
235 |
st.plotly_chart(fig_map, use_container_width=True)
|
236 |
else:
|
237 |
st.info(translations['no_data'])
|
238 |
+
|
239 |
+
st.markdown("---")
|
240 |
+
|
241 |
# Dodatkowe Wizualizacje
|
242 |
st.markdown("### " + translations['heatmap_title'])
|
243 |
# Heatmapa oszustw na podstawie lokalizacji
|
244 |
if not fraud_countries.empty:
|
245 |
# Przygotowanie danych geograficznych
|
246 |
+
# U偶yjemy szeroko艣ci i d艂ugo艣ci geograficznej kraj贸w
|
247 |
+
country_coords = {}
|
248 |
+
for country in fraud_countries['country'].unique():
|
249 |
+
if country == "Unknown":
|
250 |
+
country_coords[country] = (0, 0) # Centrum 艣wiata
|
251 |
+
else:
|
252 |
+
try:
|
253 |
+
country_obj = pycountry.countries.lookup(country)
|
254 |
+
# U偶yjemy 艣rednich szeroko艣ci i d艂ugo艣ci geograficznej
|
255 |
+
geocode_url = f"https://restcountries.com/v3.1/name/{country}"
|
256 |
+
response = requests.get(geocode_url)
|
257 |
+
if response.status_code == 200:
|
258 |
+
data = response.json()
|
259 |
+
lat = data[0]['latlng'][0]
|
260 |
+
lon = data[0]['latlng'][1]
|
261 |
+
country_coords[country] = (lat, lon)
|
262 |
+
else:
|
263 |
+
country_coords[country] = (0, 0)
|
264 |
+
except:
|
265 |
+
country_coords[country] = (0, 0)
|
266 |
+
|
267 |
+
fraud_countries['lat'] = fraud_countries['country'].apply(lambda x: country_coords.get(x, (0,0))[0])
|
268 |
+
fraud_countries['lon'] = fraud_countries['country'].apply(lambda x: country_coords.get(x, (0,0))[1])
|
269 |
+
|
270 |
# Tworzenie Heatmapy
|
271 |
fig_heatmap = px.density_mapbox(
|
272 |
fraud_countries,
|
|
|
279 |
mapbox_style="stamen-terrain",
|
280 |
title=translations['heatmap_title']
|
281 |
)
|
282 |
+
fig_heatmap.update_geos(showcountries=True, showcoastlines=True)
|
283 |
st.plotly_chart(fig_heatmap, use_container_width=True)
|
284 |
else:
|
285 |
st.info(translations['no_data'])
|
286 |
+
|
287 |
# Gauge Chart - Procentowy udzia艂 oszustw
|
288 |
st.markdown("### " + translations['fraud_percentage'])
|
289 |
fig_gauge = go.Figure(go.Indicator(
|
|
|
308 |
}
|
309 |
))
|
310 |
st.plotly_chart(fig_gauge, use_container_width=True)
|
|