Spaces:
Running
Running
Update pages/Statistics.py
Browse files- pages/Statistics.py +51 -40
pages/Statistics.py
CHANGED
@@ -4,7 +4,6 @@ import streamlit as st
|
|
4 |
import pandas as pd
|
5 |
import plotly.express as px
|
6 |
import plotly.graph_objects as go
|
7 |
-
import json
|
8 |
import os
|
9 |
import re
|
10 |
from datetime import datetime, timedelta
|
@@ -14,8 +13,8 @@ import requests
|
|
14 |
# Importowanie funkcji z utils/functions.py
|
15 |
from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers
|
16 |
|
17 |
-
# Importowanie
|
18 |
-
from streamlit_extras.
|
19 |
|
20 |
# Definiowanie tłumaczeń dla zakładki "Statystyki"
|
21 |
page_translations = {
|
@@ -34,7 +33,6 @@ page_translations = {
|
|
34 |
'fraud_trend_title': "Trendy Oszustw w Czasie",
|
35 |
'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
|
36 |
'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
|
37 |
-
'heatmap_title': "Heatmapa Oszustw w Czasie",
|
38 |
'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
|
39 |
'no_data': "Brak dostępnych danych do wyświetlenia.",
|
40 |
'download_button': "📥 Pobierz dane jako CSV",
|
@@ -48,20 +46,19 @@ page_translations = {
|
|
48 |
'header': "📊 Anwendungsstatistiken",
|
49 |
'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
|
50 |
'total_analyses': "Anzahl der analysierten Nachrichten",
|
51 |
-
'total_frauds_detected': "Erkannte
|
52 |
'fraud_percentage': "Betrugsprozentsatz",
|
53 |
-
'history_title': "
|
54 |
-
'frauds_over_time': "Anzahl
|
55 |
'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
|
56 |
-
'fraud_country_distribution_title': "
|
57 |
'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
|
58 |
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
|
59 |
-
'fraud_country_distribution': "
|
60 |
-
'
|
61 |
-
'
|
62 |
-
'no_data': "Keine Daten zur Anzeige verfügbar.",
|
63 |
'download_button': "📥 Daten als CSV herunterladen",
|
64 |
-
'select_date_range': "
|
65 |
'search_placeholder': "Suche in der Historie:",
|
66 |
'recent_days': 30
|
67 |
},
|
@@ -80,7 +77,6 @@ page_translations = {
|
|
80 |
'fraud_trend_title': "Fraud Trends Over Time",
|
81 |
'risk_distribution': "Distribution of Fraud Risk Scores",
|
82 |
'fraud_country_distribution': "Fraud Distribution by Countries",
|
83 |
-
'heatmap_title': "Fraud Heatmap Over Time",
|
84 |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
|
85 |
'no_data': "No data available to display.",
|
86 |
'download_button': "📥 Download data as CSV",
|
@@ -90,17 +86,33 @@ page_translations = {
|
|
90 |
}
|
91 |
}
|
92 |
|
93 |
-
# Mapowanie
|
94 |
country_name_mapping = {
|
95 |
-
'niemcy': 'DEU',
|
96 |
-
'
|
97 |
-
'
|
98 |
-
'
|
99 |
-
'
|
100 |
-
'
|
101 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
'unknown': None,
|
103 |
-
'nieznany': None
|
|
|
104 |
# Dodaj inne kraje w razie potrzeby
|
105 |
}
|
106 |
|
@@ -128,7 +140,7 @@ def main(language):
|
|
128 |
stats = get_stats()
|
129 |
history = get_history()
|
130 |
except Exception as e:
|
131 |
-
st.error(f"
|
132 |
st.stop()
|
133 |
|
134 |
# Kluczowe metryki
|
@@ -140,12 +152,12 @@ def main(language):
|
|
140 |
with col1:
|
141 |
card(
|
142 |
title=translations['total_analyses'],
|
143 |
-
|
144 |
)
|
145 |
with col2:
|
146 |
card(
|
147 |
title=translations['total_frauds_detected'],
|
148 |
-
|
149 |
)
|
150 |
with col3:
|
151 |
if total_analyses > 0:
|
@@ -154,7 +166,7 @@ def main(language):
|
|
154 |
fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz
|
155 |
card(
|
156 |
title=translations['fraud_percentage'],
|
157 |
-
|
158 |
)
|
159 |
|
160 |
st.markdown("---")
|
@@ -178,7 +190,7 @@ def main(language):
|
|
178 |
max_value=datetime.now().date()
|
179 |
)
|
180 |
except Exception as e:
|
181 |
-
st.error(f"
|
182 |
st.stop()
|
183 |
|
184 |
# Filtracja historii na podstawie daty
|
@@ -193,7 +205,7 @@ def main(language):
|
|
193 |
mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
|
194 |
df_filtered = df_history.loc[mask]
|
195 |
except Exception as e:
|
196 |
-
st.error(f"
|
197 |
st.stop()
|
198 |
|
199 |
# Dodanie pola wyszukiwania
|
@@ -223,7 +235,8 @@ def main(language):
|
|
223 |
|
224 |
# Aktualizacja statystyk na podstawie filtrowanej historii
|
225 |
total_filtered = df_filtered.shape[0]
|
226 |
-
frauds_filtered = df_filtered['risk_assessment'].apply(
|
|
|
227 |
fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
|
228 |
|
229 |
# Wyświetlenie metryk dla filtrowanej historii
|
@@ -232,23 +245,23 @@ def main(language):
|
|
232 |
with col4:
|
233 |
card(
|
234 |
title=translations['total_analyses'],
|
235 |
-
|
236 |
)
|
237 |
with col5:
|
238 |
card(
|
239 |
title=translations['total_frauds_detected'],
|
240 |
-
|
241 |
)
|
242 |
with col6:
|
243 |
card(
|
244 |
title=translations['fraud_percentage'],
|
245 |
-
|
246 |
)
|
247 |
|
248 |
# Wizualizacja procentowego podziału oszustw
|
249 |
st.markdown("### " + translations['fraud_vs_nonfraud'])
|
250 |
fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
|
251 |
-
fraud_labels = ['
|
252 |
fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
|
253 |
marker_colors=['#FF6347', '#4682B4'])])
|
254 |
fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
|
@@ -257,16 +270,15 @@ def main(language):
|
|
257 |
# Trend oszustw w czasie
|
258 |
st.markdown("### " + translations['frauds_over_time'])
|
259 |
fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
|
260 |
-
fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': '
|
261 |
-
fig_trend = px.line(fraud_over_time, x='Date', y='
|
262 |
-
labels={'Date': '
|
263 |
fig_trend.update_traces(line=dict(color='firebrick'))
|
264 |
st.plotly_chart(fig_trend, use_container_width=True)
|
265 |
|
266 |
# Rozkład ocen ryzyka
|
267 |
st.markdown("### " + translations['risk_distribution_title'])
|
268 |
|
269 |
-
|
270 |
def extract_risk_score(risk_assessment):
|
271 |
match = re.search(r'(\d+)/10', risk_assessment)
|
272 |
return int(match.group(1)) if match else 0
|
@@ -275,14 +287,13 @@ def main(language):
|
|
275 |
risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
|
276 |
risk_distribution.columns = ['risk_score', 'count']
|
277 |
fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
|
278 |
-
labels={'risk_score': '
|
279 |
color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
|
280 |
st.plotly_chart(fig_risk, use_container_width=True)
|
281 |
|
282 |
# Rozkład oszustw według krajów
|
283 |
st.markdown("### " + translations['fraud_country_distribution_title'])
|
284 |
|
285 |
-
|
286 |
def get_country(row):
|
287 |
country, _ = get_phone_info(row['phone_number'])
|
288 |
return country if country else "Unknown"
|
|
|
4 |
import pandas as pd
|
5 |
import plotly.express as px
|
6 |
import plotly.graph_objects as go
|
|
|
7 |
import os
|
8 |
import re
|
9 |
from datetime import datetime, timedelta
|
|
|
13 |
# Importowanie funkcji z utils/functions.py
|
14 |
from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers
|
15 |
|
16 |
+
# Importowanie funkcji 'card' z 'streamlit_extras.card'
|
17 |
+
from streamlit_extras.card import card
|
18 |
|
19 |
# Definiowanie tłumaczeń dla zakładki "Statystyki"
|
20 |
page_translations = {
|
|
|
33 |
'fraud_trend_title': "Trendy Oszustw w Czasie",
|
34 |
'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
|
35 |
'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
|
|
|
36 |
'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
|
37 |
'no_data': "Brak dostępnych danych do wyświetlenia.",
|
38 |
'download_button': "📥 Pobierz dane jako CSV",
|
|
|
46 |
'header': "📊 Anwendungsstatistiken",
|
47 |
'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
|
48 |
'total_analyses': "Anzahl der analysierten Nachrichten",
|
49 |
+
'total_frauds_detected': "Erkannte Betrugsfälle",
|
50 |
'fraud_percentage': "Betrugsprozentsatz",
|
51 |
+
'history_title': "Verlauf analysierter Nachrichten",
|
52 |
+
'frauds_over_time': "Anzahl erkannter Betrugsfälle im Laufe der Zeit",
|
53 |
'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
|
54 |
+
'fraud_country_distribution_title': "Verteilung der Betrugsfälle nach Ländern",
|
55 |
'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
|
56 |
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
|
57 |
+
'fraud_country_distribution': "Verteilung der Betrugsfälle nach Ländern",
|
58 |
+
'fraud_vs_nonfraud': "Prozentuale Aufteilung: Betrug vs. Sicher",
|
59 |
+
'no_data': "Keine Daten zum Anzeigen verfügbar.",
|
|
|
60 |
'download_button': "📥 Daten als CSV herunterladen",
|
61 |
+
'select_date_range': "Datumsbereich auswählen:",
|
62 |
'search_placeholder': "Suche in der Historie:",
|
63 |
'recent_days': 30
|
64 |
},
|
|
|
77 |
'fraud_trend_title': "Fraud Trends Over Time",
|
78 |
'risk_distribution': "Distribution of Fraud Risk Scores",
|
79 |
'fraud_country_distribution': "Fraud Distribution by Countries",
|
|
|
80 |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
|
81 |
'no_data': "No data available to display.",
|
82 |
'download_button': "📥 Download data as CSV",
|
|
|
86 |
}
|
87 |
}
|
88 |
|
89 |
+
# Mapowanie nazw krajów na kody ISO alfa-3
|
90 |
country_name_mapping = {
|
91 |
+
'niemcy': 'DEU', # Niemcy po polsku
|
92 |
+
'germany': 'DEU', # Niemcy po angielsku
|
93 |
+
'deutschland': 'DEU', # Niemcy po niemiecku
|
94 |
+
'polska': 'POL', # Polska po polsku
|
95 |
+
'poland': 'POL', # Polska po angielsku
|
96 |
+
'österreich': 'AUT', # Austria po niemiecku
|
97 |
+
'austria': 'AUT', # Austria po angielsku
|
98 |
+
'francja': 'FRA', # Francja po polsku
|
99 |
+
'france': 'FRA', # Francja po angielsku
|
100 |
+
'frankreich': 'FRA', # Francja po niemiecku
|
101 |
+
'włochy': 'ITA', # Włochy po polsku
|
102 |
+
'italy': 'ITA', # Włochy po angielsku
|
103 |
+
'italien': 'ITA', # Włochy po niemiecku
|
104 |
+
'hiszpania': 'ESP', # Hiszpania po polsku
|
105 |
+
'spain': 'ESP', # Hiszpania po angielsku
|
106 |
+
'spanien': 'ESP', # Hiszpania po niemiecku
|
107 |
+
'stany zjednoczone': 'USA', # USA po polsku
|
108 |
+
'usa': 'USA', # USA po angielsku
|
109 |
+
'vereinigte staaten': 'USA',# USA po niemiecku
|
110 |
+
'wielka brytania': 'GBR', # Wielka Brytania po polsku
|
111 |
+
'united kingdom': 'GBR', # Wielka Brytania po angielsku
|
112 |
+
'vereinigtes königreich': 'GBR', # Wielka Brytania po niemiecku
|
113 |
'unknown': None,
|
114 |
+
'nieznany': None,
|
115 |
+
'unbekannt': None
|
116 |
# Dodaj inne kraje w razie potrzeby
|
117 |
}
|
118 |
|
|
|
140 |
stats = get_stats()
|
141 |
history = get_history()
|
142 |
except Exception as e:
|
143 |
+
st.error(f"{translations['no_data']} ({e})")
|
144 |
st.stop()
|
145 |
|
146 |
# Kluczowe metryki
|
|
|
152 |
with col1:
|
153 |
card(
|
154 |
title=translations['total_analyses'],
|
155 |
+
text=str(total_analyses),
|
156 |
)
|
157 |
with col2:
|
158 |
card(
|
159 |
title=translations['total_frauds_detected'],
|
160 |
+
text=str(total_frauds_detected),
|
161 |
)
|
162 |
with col3:
|
163 |
if total_analyses > 0:
|
|
|
166 |
fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz
|
167 |
card(
|
168 |
title=translations['fraud_percentage'],
|
169 |
+
text=f"{fraud_percentage:.2f}%",
|
170 |
)
|
171 |
|
172 |
st.markdown("---")
|
|
|
190 |
max_value=datetime.now().date()
|
191 |
)
|
192 |
except Exception as e:
|
193 |
+
st.error(f"{translations['no_data']} ({e})")
|
194 |
st.stop()
|
195 |
|
196 |
# Filtracja historii na podstawie daty
|
|
|
205 |
mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
|
206 |
df_filtered = df_history.loc[mask]
|
207 |
except Exception as e:
|
208 |
+
st.error(f"{translations['no_data']} ({e})")
|
209 |
st.stop()
|
210 |
|
211 |
# Dodanie pola wyszukiwania
|
|
|
235 |
|
236 |
# Aktualizacja statystyk na podstawie filtrowanej historii
|
237 |
total_filtered = df_filtered.shape[0]
|
238 |
+
frauds_filtered = df_filtered['risk_assessment'].apply(
|
239 |
+
lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
|
240 |
fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
|
241 |
|
242 |
# Wyświetlenie metryk dla filtrowanej historii
|
|
|
245 |
with col4:
|
246 |
card(
|
247 |
title=translations['total_analyses'],
|
248 |
+
text=str(total_filtered),
|
249 |
)
|
250 |
with col5:
|
251 |
card(
|
252 |
title=translations['total_frauds_detected'],
|
253 |
+
text=str(frauds_filtered),
|
254 |
)
|
255 |
with col6:
|
256 |
card(
|
257 |
title=translations['fraud_percentage'],
|
258 |
+
text=f"{fraud_percentage_filtered:.2f}%",
|
259 |
)
|
260 |
|
261 |
# Wizualizacja procentowego podziału oszustw
|
262 |
st.markdown("### " + translations['fraud_vs_nonfraud'])
|
263 |
fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
|
264 |
+
fraud_labels = [translations['total_frauds_detected'], translations['total_analyses']]
|
265 |
fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
|
266 |
marker_colors=['#FF6347', '#4682B4'])])
|
267 |
fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
|
|
|
270 |
# Trend oszustw w czasie
|
271 |
st.markdown("### " + translations['frauds_over_time'])
|
272 |
fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
|
273 |
+
fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': translations['total_frauds_detected']}, inplace=True)
|
274 |
+
fig_trend = px.line(fraud_over_time, x='Date', y=translations['total_frauds_detected'], title=translations['frauds_over_time'],
|
275 |
+
labels={'Date': translations['select_date_range'], translations['total_frauds_detected']: translations['total_frauds_detected']}, markers=True)
|
276 |
fig_trend.update_traces(line=dict(color='firebrick'))
|
277 |
st.plotly_chart(fig_trend, use_container_width=True)
|
278 |
|
279 |
# Rozkład ocen ryzyka
|
280 |
st.markdown("### " + translations['risk_distribution_title'])
|
281 |
|
|
|
282 |
def extract_risk_score(risk_assessment):
|
283 |
match = re.search(r'(\d+)/10', risk_assessment)
|
284 |
return int(match.group(1)) if match else 0
|
|
|
287 |
risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
|
288 |
risk_distribution.columns = ['risk_score', 'count']
|
289 |
fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
|
290 |
+
labels={'risk_score': translations['risk_distribution'], 'count': translations['total_analyses']},
|
291 |
color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
|
292 |
st.plotly_chart(fig_risk, use_container_width=True)
|
293 |
|
294 |
# Rozkład oszustw według krajów
|
295 |
st.markdown("### " + translations['fraud_country_distribution_title'])
|
296 |
|
|
|
297 |
def get_country(row):
|
298 |
country, _ = get_phone_info(row['phone_number'])
|
299 |
return country if country else "Unknown"
|