Spaces:
Running
Running
# pages/Statistics.py | |
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import os | |
import re | |
from datetime import datetime, timedelta | |
import pycountry | |
import requests | |
# Importowanie funkcji z utils/functions.py | |
from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers | |
# Definiowanie tłumaczeń dla zakładki "Statystyki" | |
page_translations = { | |
'Polish': { | |
'page_title': "📊 Statystyki", | |
'page_icon': "📈", | |
'header': "📊 Statystyki Aplikacji", | |
'description': "Poniżej znajdują się statystyki analizy wiadomości w aplikacji.", | |
'total_analyses': "Liczba przeanalizowanych wiadomości", | |
'total_frauds_detected': "Wykryte oszustwa", | |
'fraud_percentage': "Procent oszustw", | |
'history_title': "Historia Analizowanych Wiadomości", | |
'frauds_over_time': "Liczba Wykrytych Oszustw w Czasie", | |
'risk_distribution_title': "Rozkład Ocen Ryzyka Oszustwa", | |
'fraud_country_distribution_title': "Rozkład Oszustw Według Krajów", | |
'fraud_trend_title': "Trendy Oszustw w Czasie", | |
'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa", | |
'fraud_country_distribution': "Rozkład Oszustw Według Krajów", | |
'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne", | |
'no_data': "Brak dostępnych danych do wyświetlenia.", | |
'download_button': "📥 Pobierz dane jako CSV", | |
'select_date_range': "Wybierz zakres dat:", | |
'recent_days': 30 # Domyślny zakres dat | |
}, | |
'German': { | |
'page_title': "📊 Statistiken", | |
'page_icon': "📈", | |
'header': "📊 Anwendungsstatistiken", | |
'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.", | |
'total_analyses': "Anzahl der analysierten Nachrichten", | |
'total_frauds_detected': "Erkannte Betrugsfälle", | |
'fraud_percentage': "Betrugsprozentsatz", | |
'history_title': "Verlauf analysierter Nachrichten", | |
'frauds_over_time': "Anzahl erkannter Betrugsfälle im Laufe der Zeit", | |
'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen", | |
'fraud_country_distribution_title': "Verteilung der Betrugsfälle nach Ländern", | |
'fraud_trend_title': "Betrugstrends im Laufe der Zeit", | |
'risk_distribution': "Verteilung der Betrugsrisikobewertungen", | |
'fraud_country_distribution': "Verteilung der Betrugsfälle nach Ländern", | |
'fraud_vs_nonfraud': "Prozentuale Aufteilung: Betrug vs. Sicher", | |
'no_data': "Keine Daten zum Anzeigen verfügbar.", | |
'download_button': "📥 Daten als CSV herunterladen", | |
'select_date_range': "Datumsbereich auswählen:", | |
'recent_days': 30 | |
}, | |
'English': { | |
'page_title': "📊 Statistics", | |
'page_icon': "📈", | |
'header': "📊 Application Statistics", | |
'description': "Below are the statistics of message analysis in the app.", | |
'total_analyses': "Total Messages Analyzed", | |
'total_frauds_detected': "Frauds Detected", | |
'fraud_percentage': "Fraud Percentage", | |
'history_title': "History of Analyzed Messages", | |
'frauds_over_time': "Number of Detected Frauds Over Time", | |
'risk_distribution_title': "Distribution of Fraud Risk Scores", | |
'fraud_country_distribution_title': "Fraud Distribution by Countries", | |
'fraud_trend_title': "Fraud Trends Over Time", | |
'risk_distribution': "Distribution of Fraud Risk Scores", | |
'fraud_country_distribution': "Fraud Distribution by Countries", | |
'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage", | |
'no_data': "No data available to display.", | |
'download_button': "📥 Download data as CSV", | |
'select_date_range': "Select date range:", | |
'recent_days': 30 | |
} | |
} | |
# Mapowanie nazw krajów na kody ISO alfa-3 | |
country_name_mapping = { | |
'niemcy': 'DEU', # Niemcy po polsku | |
'germany': 'DEU', # Niemcy po angielsku | |
'deutschland': 'DEU', # Niemcy po niemiecku | |
'polska': 'POL', # Polska po polsku | |
'poland': 'POL', # Polska po angielsku | |
'österreich': 'AUT', # Austria po niemiecku | |
'austria': 'AUT', # Austria po angielsku | |
'francja': 'FRA', # Francja po polsku | |
'france': 'FRA', # Francja po angielsku | |
'frankreich': 'FRA', # Francja po niemiecku | |
'włochy': 'ITA', # Włochy po polsku | |
'italy': 'ITA', # Włochy po angielsku | |
'italien': 'ITA', # Włochy po niemiecku | |
'hiszpania': 'ESP', # Hiszpania po polsku | |
'spain': 'ESP', # Hiszpania po angielsku | |
'spanien': 'ESP', # Hiszpania po niemiecku | |
'stany zjednoczone': 'USA', # USA po polsku | |
'usa': 'USA', # USA po angielsku | |
'vereinigte staaten': 'USA',# USA po niemiecku | |
'wielka brytania': 'GBR', # Wielka Brytania po polsku | |
'united kingdom': 'GBR', # Wielka Brytania po angielsku | |
'vereinigtes königreich': 'GBR', # Wielka Brytania po niemiecku | |
'unknown': None, | |
'nieznany': None, | |
'unbekannt': None | |
# Dodaj inne kraje w razie potrzeby | |
} | |
def get_iso_alpha3(country_name): | |
country_code = country_name_mapping.get(country_name.lower()) | |
if country_code: | |
return country_code | |
else: | |
# Jeśli nie znaleziono w mapowaniu, spróbuj użyć pycountry | |
try: | |
country = pycountry.countries.lookup(country_name) | |
return country.alpha_3 | |
except LookupError: | |
return None | |
def main(language): | |
translations = page_translations.get(language, page_translations['Polish']) | |
st.title(translations['header']) | |
st.markdown(translations['description']) | |
# Pobieranie danych z plików JSON | |
try: | |
stats = get_stats() | |
history = get_history() | |
except Exception as e: | |
st.error(f"{translations['no_data']} ({e})") | |
st.stop() | |
# Kluczowe metryki | |
total_analyses = stats.get("total_analyses", 0) | |
total_frauds_detected = stats.get("total_frauds_detected", 0) | |
if total_analyses > 0: | |
fraud_percentage = (total_frauds_detected / total_analyses) * 100 | |
else: | |
fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz | |
# Wyświetlenie metryk za pomocą st.metric() | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric(label=translations['total_analyses'], value=str(total_analyses)) | |
with col2: | |
st.metric(label=translations['total_frauds_detected'], value=str(total_frauds_detected)) | |
with col3: | |
st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%") | |
st.markdown("---") | |
# Dodanie interaktywnego filtra daty w głównym obszarze | |
st.header(translations['select_date_range']) | |
try: | |
col_start, col_end = st.columns(2) | |
with col_start: | |
start_date = st.date_input( | |
translations['select_date_range'] + " - " + "Start", | |
value=datetime.now().date() - timedelta(days=translations['recent_days']), | |
min_value=datetime.now().date() - timedelta(days=365), | |
max_value=datetime.now().date() | |
) | |
with col_end: | |
end_date = st.date_input( | |
translations['select_date_range'] + " - " + "End", | |
value=datetime.now().date(), | |
min_value=start_date, | |
max_value=datetime.now().date() | |
) | |
except Exception as e: | |
st.error(f"{translations['no_data']} ({e})") | |
st.stop() | |
# Filtracja historii na podstawie daty | |
if history: | |
try: | |
df_history = pd.DataFrame(history) | |
# Upewnij się, że 'timestamp' jest w formacie datetime | |
df_history['timestamp'] = pd.to_datetime(df_history['timestamp'], errors='coerce') | |
# Usunięcie wpisów z błędnymi datami | |
df_history = df_history.dropna(subset=['timestamp']) | |
# Filtracja na podstawie daty | |
mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date) | |
df_filtered = df_history.loc[mask] | |
except Exception as e: | |
st.error(f"{translations['no_data']} ({e})") | |
st.stop() | |
# Wyświetlenie tabeli historii analiz | |
st.markdown(f"### {translations['history_title']}") | |
if not df_filtered.empty: | |
st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300) | |
# Opcjonalnie: Dodanie możliwości eksportu danych | |
csv = df_filtered.to_csv(index=False).encode('utf-8') | |
st.download_button( | |
label=translations['download_button'], | |
data=csv, | |
file_name='analysis_history.csv', | |
mime='text/csv', | |
) | |
else: | |
st.info(translations['no_data']) | |
st.markdown("---") | |
# Aktualizacja statystyk na podstawie filtrowanej historii | |
total_filtered = df_filtered.shape[0] | |
frauds_filtered = df_filtered['risk_assessment'].apply( | |
lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum() | |
fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0 | |
# Wyświetlenie metryk dla filtrowanej historii | |
st.markdown("### " + translations['fraud_trend_title']) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric(label=translations['total_analyses'], value=str(total_filtered)) | |
with col2: | |
st.metric(label=translations['total_frauds_detected'], value=str(frauds_filtered)) | |
with col3: | |
st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage_filtered:.2f}%") | |
# Wizualizacja procentowego podziału oszustw | |
st.markdown("### " + translations['fraud_vs_nonfraud']) | |
fraud_data = [frauds_filtered, total_filtered - frauds_filtered] | |
fraud_labels = [translations['total_frauds_detected'], translations['total_analyses']] | |
fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3, | |
marker_colors=['#FF6347', '#4682B4'])]) | |
fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud']) | |
st.plotly_chart(fig_fraud_pie, use_container_width=True) | |
# Trend oszustw w czasie | |
st.markdown("### " + translations['frauds_over_time']) | |
fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index() | |
fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': translations['total_frauds_detected']}, inplace=True) | |
fig_trend = px.line(fraud_over_time, x='Date', y=translations['total_frauds_detected'], title=translations['frauds_over_time'], | |
labels={'Date': translations['select_date_range'], translations['total_frauds_detected']: translations['total_frauds_detected']}, markers=True) | |
fig_trend.update_traces(line=dict(color='firebrick')) | |
st.plotly_chart(fig_trend, use_container_width=True) | |
# Rozkład ocen ryzyka | |
st.markdown("### " + translations['risk_distribution_title']) | |
def extract_risk_score(risk_assessment): | |
match = re.search(r'(\d+)/10', risk_assessment) | |
return int(match.group(1)) if match else 0 | |
df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score) | |
risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index() | |
risk_distribution.columns = ['risk_score', 'count'] | |
fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'], | |
labels={'risk_score': translations['risk_distribution'], 'count': translations['total_analyses']}, | |
color='risk_score', color_continuous_scale=px.colors.sequential.RdBu) | |
st.plotly_chart(fig_risk, use_container_width=True) | |
# Rozkład oszustw według krajów | |
st.markdown("### " + translations['fraud_country_distribution_title']) | |
def get_country(row): | |
country, _ = get_phone_info(row['phone_number']) | |
return country if country else "Unknown" | |
df_filtered['country'] = df_filtered.apply(get_country, axis=1) | |
fraud_countries = df_filtered['country'].value_counts().reset_index() | |
fraud_countries.columns = ['country', 'counts'] | |
# Dodanie kodów krajów | |
fraud_countries['iso_alpha'] = fraud_countries['country'].apply( | |
lambda x: get_iso_alpha3(x) if x != "Unknown" else None) | |
fraud_countries = fraud_countries.dropna(subset=['iso_alpha']) | |
if not fraud_countries.empty: | |
fig_map = px.choropleth( | |
fraud_countries, | |
locations='iso_alpha', | |
color='counts', | |
hover_name='country', | |
color_continuous_scale=px.colors.sequential.Plasma, | |
title=translations['fraud_country_distribution_title'] | |
) | |
fig_map.update_geos(showcountries=True, showcoastlines=True) | |
st.plotly_chart(fig_map, use_container_width=True) | |
else: | |
st.info(translations['no_data']) | |
st.markdown("---") | |
# Gauge Chart - Procentowy udział oszustw | |
st.markdown("### " + translations['fraud_percentage']) | |
fig_gauge = go.Figure(go.Indicator( | |
mode="gauge+number", | |
value=fraud_percentage_filtered, | |
title={'text': translations['fraud_percentage']}, | |
gauge={ | |
'axis': {'range': [0, 100]}, | |
'bar': {'color': "darkblue"}, | |
'steps': [ | |
{'range': [0, 20], 'color': "#55efc4"}, | |
{'range': [20, 40], 'color': "#81ecec"}, | |
{'range': [40, 60], 'color': "#74b9ff"}, | |
{'range': [60, 80], 'color': "#a29bfe"}, | |
{'range': [80, 100], 'color': "#d63031"} | |
], | |
'threshold': { | |
'line': {'color': "red", 'width': 4}, | |
'thickness': 0.75, | |
'value': 70 | |
} | |
} | |
)) | |
st.plotly_chart(fig_gauge, use_container_width=True) | |
else: | |
st.info(translations['no_data']) | |
# Upewnij się, że st.set_page_config() jest wywoływane tylko w app.py | |