Spaces:
Running
Running
File size: 14,744 Bytes
a320165 128b0c3 7ac8b0d fe1d0fa 7360f5c 8deec8b c7b500a 67ad7bd 27f4c94 67ad7bd b3eb2dd 67ad7bd 8545836 67ad7bd b3eb2dd b881485 b3eb2dd b881485 82d13f9 b881485 7360f5c b881485 67ad7bd b881485 2bc78fb b3eb2dd 82d13f9 7360f5c fe1d0fa 8545836 67ad7bd 2bc78fb b3eb2dd 3655f6c b881485 2bc78fb b881485 2bc78fb b881485 2bc78fb b3eb2dd 4eb2567 004670a 4eb2567 7360f5c 2bc78fb b881485 2bc78fb 4eb2567 b3eb2dd 7360f5c 6a02207 4eb2567 6a02207 4eb2567 6a02207 cb63f35 6a02207 4eb2567 27f4c94 4eb2567 2bc78fb 4eb2567 2bc78fb b881485 2bc78fb 4eb2567 004670a b3eb2dd 2bc78fb b881485 2bc78fb 4eb2567 27f4c94 4eb2567 27f4c94 4eb2567 27f4c94 4eb2567 67ad7bd b881485 67ad7bd 4eb2567 67ad7bd 004670a 6a02207 4eb2567 67ad7bd b881485 67ad7bd 4eb2567 82d13f9 67ad7bd b881485 67ad7bd 82d13f9 4eb2567 82d13f9 67ad7bd 4eb2567 8deec8b 4eb2567 67ad7bd 82d13f9 7360f5c b881485 7360f5c 82d13f9 4eb2567 82d13f9 67ad7bd 4eb2567 67ad7bd 4eb2567 67ad7bd 82d13f9 4eb2567 82d13f9 4eb2567 82d13f9 4eb2567 82d13f9 a320165 82d13f9 8545836 82d13f9 a320165 27f4c94 a320165 fe1d0fa a320165 4eb2567 27f4c94 4eb2567 67ad7bd 4eb2567 2bc78fb 4eb2567 6a02207 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 |
# pages/Statistics.py
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
import re
from datetime import datetime, timedelta
import pycountry
import requests
# Importowanie funkcji z utils/functions.py
from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers
# Definiowanie t艂umacze艅 dla zak艂adki "Statystyki"
page_translations = {
'Polish': {
'page_title': "馃搳 Statystyki",
'page_icon': "馃搱",
'header': "馃搳 Statystyki Aplikacji",
'description': "Poni偶ej znajduj膮 si臋 statystyki analizy wiadomo艣ci w aplikacji.",
'total_analyses': "Liczba przeanalizowanych wiadomo艣ci",
'total_frauds_detected': "Wykryte oszustwa",
'fraud_percentage': "Procent oszustw",
'history_title': "Historia Analizowanych Wiadomo艣ci",
'frauds_over_time': "Liczba Wykrytych Oszustw w Czasie",
'risk_distribution_title': "Rozk艂ad Ocen Ryzyka Oszustwa",
'fraud_country_distribution_title': "Rozk艂ad Oszustw Wed艂ug Kraj贸w",
'fraud_trend_title': "Trendy Oszustw w Czasie",
'risk_distribution': "Rozk艂ad Ocen Ryzyka Oszustwa",
'fraud_country_distribution': "Rozk艂ad Oszustw Wed艂ug Kraj贸w",
'fraud_vs_nonfraud': "Procentowy Podzia艂: Oszustwa vs Bezpieczne",
'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
'download_button': "馃摜 Pobierz dane jako CSV",
'select_date_range': "Wybierz zakres dat:",
'recent_days': 30 # Domy艣lny zakres dat
},
'German': {
'page_title': "馃搳 Statistiken",
'page_icon': "馃搱",
'header': "馃搳 Anwendungsstatistiken",
'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
'total_analyses': "Anzahl der analysierten Nachrichten",
'total_frauds_detected': "Erkannte Betrugsf盲lle",
'fraud_percentage': "Betrugsprozentsatz",
'history_title': "Verlauf analysierter Nachrichten",
'frauds_over_time': "Anzahl erkannter Betrugsf盲lle im Laufe der Zeit",
'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
'fraud_country_distribution_title': "Verteilung der Betrugsf盲lle nach L盲ndern",
'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
'fraud_country_distribution': "Verteilung der Betrugsf盲lle nach L盲ndern",
'fraud_vs_nonfraud': "Prozentuale Aufteilung: Betrug vs. Sicher",
'no_data': "Keine Daten zum Anzeigen verf眉gbar.",
'download_button': "馃摜 Daten als CSV herunterladen",
'select_date_range': "Datumsbereich ausw盲hlen:",
'recent_days': 30
},
'English': {
'page_title': "馃搳 Statistics",
'page_icon': "馃搱",
'header': "馃搳 Application Statistics",
'description': "Below are the statistics of message analysis in the app.",
'total_analyses': "Total Messages Analyzed",
'total_frauds_detected': "Frauds Detected",
'fraud_percentage': "Fraud Percentage",
'history_title': "History of Analyzed Messages",
'frauds_over_time': "Number of Detected Frauds Over Time",
'risk_distribution_title': "Distribution of Fraud Risk Scores",
'fraud_country_distribution_title': "Fraud Distribution by Countries",
'fraud_trend_title': "Fraud Trends Over Time",
'risk_distribution': "Distribution of Fraud Risk Scores",
'fraud_country_distribution': "Fraud Distribution by Countries",
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
'no_data': "No data available to display.",
'download_button': "馃摜 Download data as CSV",
'select_date_range': "Select date range:",
'recent_days': 30
}
}
# Mapowanie nazw kraj贸w na kody ISO alfa-3
country_name_mapping = {
'niemcy': 'DEU', # Niemcy po polsku
'germany': 'DEU', # Niemcy po angielsku
'deutschland': 'DEU', # Niemcy po niemiecku
'polska': 'POL', # Polska po polsku
'poland': 'POL', # Polska po angielsku
'枚sterreich': 'AUT', # Austria po niemiecku
'austria': 'AUT', # Austria po angielsku
'francja': 'FRA', # Francja po polsku
'france': 'FRA', # Francja po angielsku
'frankreich': 'FRA', # Francja po niemiecku
'w艂ochy': 'ITA', # W艂ochy po polsku
'italy': 'ITA', # W艂ochy po angielsku
'italien': 'ITA', # W艂ochy po niemiecku
'hiszpania': 'ESP', # Hiszpania po polsku
'spain': 'ESP', # Hiszpania po angielsku
'spanien': 'ESP', # Hiszpania po niemiecku
'stany zjednoczone': 'USA', # USA po polsku
'usa': 'USA', # USA po angielsku
'vereinigte staaten': 'USA',# USA po niemiecku
'wielka brytania': 'GBR', # Wielka Brytania po polsku
'united kingdom': 'GBR', # Wielka Brytania po angielsku
'vereinigtes k枚nigreich': 'GBR', # Wielka Brytania po niemiecku
'unknown': None,
'nieznany': None,
'unbekannt': None
# Dodaj inne kraje w razie potrzeby
}
def get_iso_alpha3(country_name):
country_code = country_name_mapping.get(country_name.lower())
if country_code:
return country_code
else:
# Je艣li nie znaleziono w mapowaniu, spr贸buj u偶y膰 pycountry
try:
country = pycountry.countries.lookup(country_name)
return country.alpha_3
except LookupError:
return None
def main(language):
translations = page_translations.get(language, page_translations['Polish'])
st.title(translations['header'])
st.markdown(translations['description'])
# Pobieranie danych z plik贸w JSON
try:
stats = get_stats()
history = get_history()
except Exception as e:
st.error(f"{translations['no_data']} ({e})")
st.stop()
# Kluczowe metryki
total_analyses = stats.get("total_analyses", 0)
total_frauds_detected = stats.get("total_frauds_detected", 0)
if total_analyses > 0:
fraud_percentage = (total_frauds_detected / total_analyses) * 100
else:
fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz
# Wy艣wietlenie metryk za pomoc膮 st.metric()
col1, col2, col3 = st.columns(3)
with col1:
st.metric(label=translations['total_analyses'], value=str(total_analyses))
with col2:
st.metric(label=translations['total_frauds_detected'], value=str(total_frauds_detected))
with col3:
st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
st.markdown("---")
# Dodanie interaktywnego filtra daty w g艂贸wnym obszarze
st.header(translations['select_date_range'])
try:
col_start, col_end = st.columns(2)
with col_start:
start_date = st.date_input(
translations['select_date_range'] + " - " + "Start",
value=datetime.now().date() - timedelta(days=translations['recent_days']),
min_value=datetime.now().date() - timedelta(days=365),
max_value=datetime.now().date()
)
with col_end:
end_date = st.date_input(
translations['select_date_range'] + " - " + "End",
value=datetime.now().date(),
min_value=start_date,
max_value=datetime.now().date()
)
except Exception as e:
st.error(f"{translations['no_data']} ({e})")
st.stop()
# Filtracja historii na podstawie daty
if history:
try:
df_history = pd.DataFrame(history)
# Upewnij si臋, 偶e 'timestamp' jest w formacie datetime
df_history['timestamp'] = pd.to_datetime(df_history['timestamp'], errors='coerce')
# Usuni臋cie wpis贸w z b艂臋dnymi datami
df_history = df_history.dropna(subset=['timestamp'])
# Filtracja na podstawie daty
mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
df_filtered = df_history.loc[mask]
except Exception as e:
st.error(f"{translations['no_data']} ({e})")
st.stop()
# Wy艣wietlenie tabeli historii analiz
st.markdown(f"### {translations['history_title']}")
if not df_filtered.empty:
st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)
# Opcjonalnie: Dodanie mo偶liwo艣ci eksportu danych
csv = df_filtered.to_csv(index=False).encode('utf-8')
st.download_button(
label=translations['download_button'],
data=csv,
file_name='analysis_history.csv',
mime='text/csv',
)
else:
st.info(translations['no_data'])
st.markdown("---")
# Aktualizacja statystyk na podstawie filtrowanej historii
total_filtered = df_filtered.shape[0]
frauds_filtered = df_filtered['risk_assessment'].apply(
lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
# Wy艣wietlenie metryk dla filtrowanej historii
st.markdown("### " + translations['fraud_trend_title'])
col1, col2, col3 = st.columns(3)
with col1:
st.metric(label=translations['total_analyses'], value=str(total_filtered))
with col2:
st.metric(label=translations['total_frauds_detected'], value=str(frauds_filtered))
with col3:
st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage_filtered:.2f}%")
# Wizualizacja procentowego podzia艂u oszustw
st.markdown("### " + translations['fraud_vs_nonfraud'])
fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
fraud_labels = [translations['total_frauds_detected'], translations['total_analyses']]
fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
marker_colors=['#FF6347', '#4682B4'])])
fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
st.plotly_chart(fig_fraud_pie, use_container_width=True)
# Trend oszustw w czasie
st.markdown("### " + translations['frauds_over_time'])
fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': translations['total_frauds_detected']}, inplace=True)
fig_trend = px.line(fraud_over_time, x='Date', y=translations['total_frauds_detected'], title=translations['frauds_over_time'],
labels={'Date': translations['select_date_range'], translations['total_frauds_detected']: translations['total_frauds_detected']}, markers=True)
fig_trend.update_traces(line=dict(color='firebrick'))
st.plotly_chart(fig_trend, use_container_width=True)
# Rozk艂ad ocen ryzyka
st.markdown("### " + translations['risk_distribution_title'])
def extract_risk_score(risk_assessment):
match = re.search(r'(\d+)/10', risk_assessment)
return int(match.group(1)) if match else 0
df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
risk_distribution.columns = ['risk_score', 'count']
fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
labels={'risk_score': translations['risk_distribution'], 'count': translations['total_analyses']},
color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
st.plotly_chart(fig_risk, use_container_width=True)
# Rozk艂ad oszustw wed艂ug kraj贸w
st.markdown("### " + translations['fraud_country_distribution_title'])
def get_country(row):
country, _ = get_phone_info(row['phone_number'])
return country if country else "Unknown"
df_filtered['country'] = df_filtered.apply(get_country, axis=1)
fraud_countries = df_filtered['country'].value_counts().reset_index()
fraud_countries.columns = ['country', 'counts']
# Dodanie kod贸w kraj贸w
fraud_countries['iso_alpha'] = fraud_countries['country'].apply(
lambda x: get_iso_alpha3(x) if x != "Unknown" else None)
fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
if not fraud_countries.empty:
fig_map = px.choropleth(
fraud_countries,
locations='iso_alpha',
color='counts',
hover_name='country',
color_continuous_scale=px.colors.sequential.Plasma,
title=translations['fraud_country_distribution_title']
)
fig_map.update_geos(showcountries=True, showcoastlines=True)
st.plotly_chart(fig_map, use_container_width=True)
else:
st.info(translations['no_data'])
st.markdown("---")
# Gauge Chart - Procentowy udzia艂 oszustw
st.markdown("### " + translations['fraud_percentage'])
fig_gauge = go.Figure(go.Indicator(
mode="gauge+number",
value=fraud_percentage_filtered,
title={'text': translations['fraud_percentage']},
gauge={
'axis': {'range': [0, 100]},
'bar': {'color': "darkblue"},
'steps': [
{'range': [0, 20], 'color': "#55efc4"},
{'range': [20, 40], 'color': "#81ecec"},
{'range': [40, 60], 'color': "#74b9ff"},
{'range': [60, 80], 'color': "#a29bfe"},
{'range': [80, 100], 'color': "#d63031"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 70
}
}
))
st.plotly_chart(fig_gauge, use_container_width=True)
else:
st.info(translations['no_data'])
# Upewnij si臋, 偶e st.set_page_config() jest wywo艂ywane tylko w app.py
|