Spaces:
Running
Running
Update pages/Statistics.py
Browse files- pages/Statistics.py +44 -79
pages/Statistics.py
CHANGED
@@ -29,11 +29,12 @@ page_translations = {
|
|
29 |
'frauds_over_time': "Liczba wykrytych oszustw w czasie",
|
30 |
'risk_distribution': "Rozk艂ad ocen ryzyka oszustwa",
|
31 |
'fraud_country_distribution': "Rozk艂ad oszustw wed艂ug kraj贸w",
|
|
|
|
|
|
|
32 |
'heatmap_title': "Mapa ciep艂a oszustw w czasie",
|
33 |
'fraud_vs_nonfraud': "Procentowy podzia艂: Oszustwa vs Bezpieczne",
|
34 |
'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
|
35 |
-
'search_placeholder': "Wyszukaj numer telefonu lub tre艣膰 wiadomo艣ci",
|
36 |
-
'date_filter_label': "Filtruj po dacie",
|
37 |
'download_button': "馃摜 Pobierz dane jako CSV"
|
38 |
},
|
39 |
'German': {
|
@@ -48,11 +49,12 @@ page_translations = {
|
|
48 |
'frauds_over_time': "Anzahl der erkannten Betr眉gereien im Laufe der Zeit",
|
49 |
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
|
50 |
'fraud_country_distribution': "Betrug nach L盲ndern",
|
|
|
|
|
|
|
51 |
'heatmap_title': "Heatmap der Betr眉gereien im Laufe der Zeit",
|
52 |
'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
|
53 |
'no_data': "Keine Daten zur Anzeige verf眉gbar.",
|
54 |
-
'search_placeholder': "Telefonnummer oder Nachrichtentext suchen",
|
55 |
-
'date_filter_label': "Nach Datum filtern",
|
56 |
'download_button': "馃摜 Daten als CSV herunterladen"
|
57 |
},
|
58 |
'English': {
|
@@ -67,11 +69,12 @@ page_translations = {
|
|
67 |
'frauds_over_time': "Number of Detected Frauds Over Time",
|
68 |
'risk_distribution': "Distribution of Fraud Risk Scores",
|
69 |
'fraud_country_distribution': "Fraud Distribution by Countries",
|
|
|
|
|
|
|
70 |
'heatmap_title': "Fraud Heatmap Over Time",
|
71 |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
|
72 |
'no_data': "No data available to display.",
|
73 |
-
'search_placeholder': "Search phone number or message content",
|
74 |
-
'date_filter_label': "Filter by Date",
|
75 |
'download_button': "馃摜 Download data as CSV"
|
76 |
}
|
77 |
}
|
@@ -114,11 +117,6 @@ def main(language):
|
|
114 |
if history:
|
115 |
st.markdown(f"### {translations['history_title']}")
|
116 |
|
117 |
-
# Pole wyszukiwania
|
118 |
-
search_query = st.text_input(translations['search_placeholder'], '')
|
119 |
-
|
120 |
-
# Dodanie filtr贸w daty
|
121 |
-
st.markdown(f"### {translations['date_filter_label']}")
|
122 |
df_history = pd.DataFrame(history)
|
123 |
|
124 |
# Sprawdzenie, czy 'timestamp' istnieje
|
@@ -132,103 +130,70 @@ def main(language):
|
|
132 |
# Dodanie kolumny 'date' dla wizualizacji
|
133 |
df_history['date'] = df_history['timestamp'].dt.date
|
134 |
|
135 |
-
#
|
136 |
-
if language == 'Polish':
|
137 |
-
start_label = "Data pocz膮tkowa"
|
138 |
-
end_label = "Data ko艅cowa"
|
139 |
-
elif language == 'German':
|
140 |
-
start_label = "Startdatum"
|
141 |
-
end_label = "Enddatum"
|
142 |
-
else:
|
143 |
-
start_label = "Start Date"
|
144 |
-
end_label = "End Date"
|
145 |
-
|
146 |
-
start_date = st.date_input(start_label, df_history['date'].min())
|
147 |
-
end_date = st.date_input(end_label, df_history['date'].max())
|
148 |
-
|
149 |
-
# Filtrowanie danych po dacie
|
150 |
-
df_filtered = df_history[
|
151 |
-
(df_history['date'] >= start_date) &
|
152 |
-
(df_history['date'] <= end_date)
|
153 |
-
]
|
154 |
-
|
155 |
-
# Dodatkowe filtrowanie na podstawie zapytania wyszukiwania
|
156 |
-
if search_query:
|
157 |
-
df_filtered = df_filtered[
|
158 |
-
df_filtered['phone_number'].str.contains(search_query, case=False, na=False) |
|
159 |
-
df_filtered['message'].str.contains(search_query, case=False, na=False)
|
160 |
-
]
|
161 |
|
162 |
# Wy艣wietlenie tabeli historii
|
163 |
-
st.dataframe(
|
164 |
|
165 |
# Opcjonalnie: Dodanie mo偶liwo艣ci eksportu danych
|
166 |
-
if not
|
167 |
-
|
168 |
-
download_label = "馃摜 Pobierz dane jako CSV"
|
169 |
-
elif language == 'German':
|
170 |
-
download_label = "馃摜 Daten als CSV herunterladen"
|
171 |
-
else:
|
172 |
-
download_label = "馃摜 Download data as CSV"
|
173 |
-
|
174 |
-
csv = df_filtered.to_csv(index=False).encode('utf-8')
|
175 |
st.download_button(
|
176 |
-
label=
|
177 |
data=csv,
|
178 |
file_name='analysis_history.csv',
|
179 |
mime='text/csv',
|
180 |
)
|
181 |
|
182 |
-
#
|
183 |
-
st.markdown(f"### {translations['
|
|
|
|
|
|
|
|
|
184 |
|
185 |
-
#
|
|
|
186 |
def extract_risk_score(risk_assessment):
|
187 |
match = re.search(r'(\d+)/10', risk_assessment)
|
188 |
return int(match.group(1)) if match else 0
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
st.plotly_chart(fig_risk_pie, use_container_width=True)
|
196 |
-
|
197 |
-
# Wizualizacja rozk艂adu oszustw wed艂ug kraj贸w
|
198 |
-
st.markdown(f"### {translations['fraud_country_distribution']}")
|
199 |
|
200 |
-
#
|
|
|
201 |
def add_country_info(row):
|
202 |
country, _ = get_phone_info(row['phone_number'])
|
203 |
return country
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
# Przygotowanie danych geograficznych
|
209 |
-
country_counts = df_filtered['country'].value_counts().reset_index()
|
210 |
-
country_counts.columns = ['country', 'counts']
|
211 |
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
|
219 |
-
|
220 |
-
|
221 |
|
222 |
-
|
223 |
fig_map = px.choropleth(
|
224 |
-
|
225 |
locations='iso_alpha',
|
226 |
color='counts',
|
227 |
hover_name='country',
|
228 |
color_continuous_scale=px.colors.sequential.Plasma,
|
229 |
-
title=translations['
|
230 |
)
|
231 |
-
fig_map.update_geos(showcountries=True, showcoastlines=True)
|
232 |
st.plotly_chart(fig_map, use_container_width=True)
|
233 |
else:
|
234 |
st.info(translations['no_data'])
|
|
|
29 |
'frauds_over_time': "Liczba wykrytych oszustw w czasie",
|
30 |
'risk_distribution': "Rozk艂ad ocen ryzyka oszustwa",
|
31 |
'fraud_country_distribution': "Rozk艂ad oszustw wed艂ug kraj贸w",
|
32 |
+
'fraud_trend_title': "Trendy oszustw w czasie",
|
33 |
+
'risk_distribution_title': "Rozk艂ad ocen ryzyka oszustwa",
|
34 |
+
'fraud_country_distribution_title': "Rozk艂ad oszustw wed艂ug kraj贸w",
|
35 |
'heatmap_title': "Mapa ciep艂a oszustw w czasie",
|
36 |
'fraud_vs_nonfraud': "Procentowy podzia艂: Oszustwa vs Bezpieczne",
|
37 |
'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
|
|
|
|
|
38 |
'download_button': "馃摜 Pobierz dane jako CSV"
|
39 |
},
|
40 |
'German': {
|
|
|
49 |
'frauds_over_time': "Anzahl der erkannten Betr眉gereien im Laufe der Zeit",
|
50 |
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
|
51 |
'fraud_country_distribution': "Betrug nach L盲ndern",
|
52 |
+
'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
|
53 |
+
'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
|
54 |
+
'fraud_country_distribution_title': "Betrug nach L盲ndern",
|
55 |
'heatmap_title': "Heatmap der Betr眉gereien im Laufe der Zeit",
|
56 |
'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
|
57 |
'no_data': "Keine Daten zur Anzeige verf眉gbar.",
|
|
|
|
|
58 |
'download_button': "馃摜 Daten als CSV herunterladen"
|
59 |
},
|
60 |
'English': {
|
|
|
69 |
'frauds_over_time': "Number of Detected Frauds Over Time",
|
70 |
'risk_distribution': "Distribution of Fraud Risk Scores",
|
71 |
'fraud_country_distribution': "Fraud Distribution by Countries",
|
72 |
+
'fraud_trend_title': "Fraud Trends Over Time",
|
73 |
+
'risk_distribution_title': "Distribution of Fraud Risk Scores",
|
74 |
+
'fraud_country_distribution_title': "Fraud Distribution by Countries",
|
75 |
'heatmap_title': "Fraud Heatmap Over Time",
|
76 |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
|
77 |
'no_data': "No data available to display.",
|
|
|
|
|
78 |
'download_button': "馃摜 Download data as CSV"
|
79 |
}
|
80 |
}
|
|
|
117 |
if history:
|
118 |
st.markdown(f"### {translations['history_title']}")
|
119 |
|
|
|
|
|
|
|
|
|
|
|
120 |
df_history = pd.DataFrame(history)
|
121 |
|
122 |
# Sprawdzenie, czy 'timestamp' istnieje
|
|
|
130 |
# Dodanie kolumny 'date' dla wizualizacji
|
131 |
df_history['date'] = df_history['timestamp'].dt.date
|
132 |
|
133 |
+
# Usuni臋cie sekcji wyszukiwania i filtrowania po dacie
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
# Wy艣wietlenie tabeli historii
|
136 |
+
st.dataframe(df_history[['timestamp', 'phone_number', 'risk_assessment']], height=300)
|
137 |
|
138 |
# Opcjonalnie: Dodanie mo偶liwo艣ci eksportu danych
|
139 |
+
if not df_history.empty:
|
140 |
+
csv = df_history.to_csv(index=False).encode('utf-8')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
st.download_button(
|
142 |
+
label=translations['download_button'],
|
143 |
data=csv,
|
144 |
file_name='analysis_history.csv',
|
145 |
mime='text/csv',
|
146 |
)
|
147 |
|
148 |
+
# Trend oszustw w czasie
|
149 |
+
st.markdown(f"### {translations['fraud_trend_title']}")
|
150 |
+
fraud_over_time = df_history.groupby(df_history['timestamp'].dt.date)['phone_number'].count().reset_index()
|
151 |
+
fraud_over_time.rename(columns={'phone_number': 'frauds_detected'}, inplace=True)
|
152 |
+
fig_trend = px.line(fraud_over_time, x='timestamp', y='frauds_detected', title=translations['frauds_over_time'])
|
153 |
+
st.plotly_chart(fig_trend, use_container_width=True)
|
154 |
|
155 |
+
# Rozk艂ad ocen ryzyka
|
156 |
+
st.markdown(f"### {translations['risk_distribution_title']}")
|
157 |
def extract_risk_score(risk_assessment):
|
158 |
match = re.search(r'(\d+)/10', risk_assessment)
|
159 |
return int(match.group(1)) if match else 0
|
160 |
|
161 |
+
df_history['risk_score'] = df_history['risk_assessment'].apply(extract_risk_score)
|
162 |
+
risk_distribution = df_history['risk_score'].value_counts().sort_index().reset_index()
|
163 |
+
risk_distribution.columns = ['risk_score', 'count']
|
164 |
+
fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'], labels={'risk_score': 'Risk Score', 'count': 'Number of Messages'}, color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
|
165 |
+
st.plotly_chart(fig_risk, use_container_width=True)
|
|
|
|
|
|
|
|
|
166 |
|
167 |
+
# Rozk艂ad oszustw wed艂ug kraj贸w
|
168 |
+
st.markdown(f"### {translations['fraud_country_distribution_title']}")
|
169 |
def add_country_info(row):
|
170 |
country, _ = get_phone_info(row['phone_number'])
|
171 |
return country
|
172 |
|
173 |
+
df_history['country'] = df_history.apply(add_country_info, axis=1)
|
174 |
+
fraud_countries = df_history['country'].value_counts().reset_index()
|
175 |
+
fraud_countries.columns = ['country', 'counts']
|
|
|
|
|
|
|
176 |
|
177 |
+
# Dodanie kod贸w kraj贸w
|
178 |
+
def get_country_code(name):
|
179 |
+
try:
|
180 |
+
return pycountry.countries.lookup(name).alpha_3
|
181 |
+
except:
|
182 |
+
return None
|
183 |
|
184 |
+
fraud_countries['iso_alpha'] = fraud_countries['country'].apply(get_country_code)
|
185 |
+
fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
|
186 |
|
187 |
+
if not fraud_countries.empty:
|
188 |
fig_map = px.choropleth(
|
189 |
+
fraud_countries,
|
190 |
locations='iso_alpha',
|
191 |
color='counts',
|
192 |
hover_name='country',
|
193 |
color_continuous_scale=px.colors.sequential.Plasma,
|
194 |
+
title=translations['fraud_country_distribution_title']
|
195 |
)
|
196 |
+
fig_map.update_geos(showcountries=True, showcoastlines=True)
|
197 |
st.plotly_chart(fig_map, use_container_width=True)
|
198 |
else:
|
199 |
st.info(translations['no_data'])
|