Spaces:

rafaldembski
/

ScamDetector

Running

File size: 14,744 Bytes

a320165
 
128b0c3
7ac8b0d
 
fe1d0fa
7360f5c
8deec8b
c7b500a
67ad7bd
27f4c94
 
 
 
67ad7bd
b3eb2dd
 
 
 
 
 
 
 
 
 
67ad7bd
 
 
 
 
 
 
 
8545836
67ad7bd
 
 
b3eb2dd
 
 
 
 
 
 
b881485
b3eb2dd
b881485
 
82d13f9
b881485
7360f5c
 
b881485
 
 
67ad7bd
b881485
2bc78fb
b3eb2dd
 
 
 
 
 
 
 
 
 
 
82d13f9
 
7360f5c
 
 
fe1d0fa
8545836
67ad7bd
 
2bc78fb
b3eb2dd
 
3655f6c
b881485
2bc78fb
b881485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bc78fb
b881485
 
2bc78fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3eb2dd
4eb2567
 
004670a
 
4eb2567
7360f5c
2bc78fb
 
 
 
b881485
2bc78fb
4eb2567
b3eb2dd
7360f5c
 
6a02207
 
 
 
4eb2567
6a02207
 
4eb2567
6a02207
 
 
cb63f35
6a02207
4eb2567
27f4c94
4eb2567
2bc78fb
 
 
4eb2567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bc78fb
b881485
2bc78fb
4eb2567
004670a
b3eb2dd
2bc78fb
 
 
 
 
 
 
 
 
 
b881485
2bc78fb
4eb2567
27f4c94
 
 
 
4eb2567
27f4c94
 
 
 
 
 
 
 
 
 
4eb2567
27f4c94
4eb2567
67ad7bd
 
b881485
 
67ad7bd
4eb2567
67ad7bd
004670a
6a02207
 
 
 
 
 
 
4eb2567
67ad7bd
 
 
b881485
67ad7bd
 
 
 
4eb2567
82d13f9
67ad7bd
 
b881485
 
 
67ad7bd
82d13f9
4eb2567
82d13f9
67ad7bd
4eb2567
8deec8b
 
 
4eb2567
67ad7bd
 
82d13f9
7360f5c
b881485
7360f5c
82d13f9
4eb2567
82d13f9
67ad7bd
4eb2567
67ad7bd
 
 
4eb2567
67ad7bd
 
82d13f9
4eb2567
82d13f9
4eb2567
 
82d13f9
4eb2567
82d13f9
a320165
82d13f9
8545836
 
 
 
82d13f9
a320165
27f4c94
a320165
fe1d0fa
a320165
4eb2567
27f4c94
4eb2567
67ad7bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eb2567
2bc78fb
 
4eb2567
6a02207

# pages/Statistics.py

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
import re
from datetime import datetime, timedelta
import pycountry
import requests

# Importowanie funkcji z utils/functions.py
from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers

# Definiowanie tłumaczeń dla zakładki "Statystyki"
page_translations = {
    'Polish': {
        'page_title': "📊 Statystyki",
        'page_icon': "📈",
        'header': "📊 Statystyki Aplikacji",
        'description': "Poniżej znajdują się statystyki analizy wiadomości w aplikacji.",
        'total_analyses': "Liczba przeanalizowanych wiadomości",
        'total_frauds_detected': "Wykryte oszustwa",
        'fraud_percentage': "Procent oszustw",
        'history_title': "Historia Analizowanych Wiadomości",
        'frauds_over_time': "Liczba Wykrytych Oszustw w Czasie",
        'risk_distribution_title': "Rozkład Ocen Ryzyka Oszustwa",
        'fraud_country_distribution_title': "Rozkład Oszustw Według Krajów",
        'fraud_trend_title': "Trendy Oszustw w Czasie",
        'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
        'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
        'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
        'no_data': "Brak dostępnych danych do wyświetlenia.",
        'download_button': "📥 Pobierz dane jako CSV",
        'select_date_range': "Wybierz zakres dat:",
        'recent_days': 30  # Domyślny zakres dat
    },
    'German': {
        'page_title': "📊 Statistiken",
        'page_icon': "📈",
        'header': "📊 Anwendungsstatistiken",
        'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
        'total_analyses': "Anzahl der analysierten Nachrichten",
        'total_frauds_detected': "Erkannte Betrugsfälle",
        'fraud_percentage': "Betrugsprozentsatz",
        'history_title': "Verlauf analysierter Nachrichten",
        'frauds_over_time': "Anzahl erkannter Betrugsfälle im Laufe der Zeit",
        'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
        'fraud_country_distribution_title': "Verteilung der Betrugsfälle nach Ländern",
        'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
        'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
        'fraud_country_distribution': "Verteilung der Betrugsfälle nach Ländern",
        'fraud_vs_nonfraud': "Prozentuale Aufteilung: Betrug vs. Sicher",
        'no_data': "Keine Daten zum Anzeigen verfügbar.",
        'download_button': "📥 Daten als CSV herunterladen",
        'select_date_range': "Datumsbereich auswählen:",
        'recent_days': 30
    },
    'English': {
        'page_title': "📊 Statistics",
        'page_icon': "📈",
        'header': "📊 Application Statistics",
        'description': "Below are the statistics of message analysis in the app.",
        'total_analyses': "Total Messages Analyzed",
        'total_frauds_detected': "Frauds Detected",
        'fraud_percentage': "Fraud Percentage",
        'history_title': "History of Analyzed Messages",
        'frauds_over_time': "Number of Detected Frauds Over Time",
        'risk_distribution_title': "Distribution of Fraud Risk Scores",
        'fraud_country_distribution_title': "Fraud Distribution by Countries",
        'fraud_trend_title': "Fraud Trends Over Time",
        'risk_distribution': "Distribution of Fraud Risk Scores",
        'fraud_country_distribution': "Fraud Distribution by Countries",
        'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
        'no_data': "No data available to display.",
        'download_button': "📥 Download data as CSV",
        'select_date_range': "Select date range:",
        'recent_days': 30
    }
}

# Mapowanie nazw krajów na kody ISO alfa-3
country_name_mapping = {
    'niemcy': 'DEU',        # Niemcy po polsku
    'germany': 'DEU',       # Niemcy po angielsku
    'deutschland': 'DEU',   # Niemcy po niemiecku
    'polska': 'POL',        # Polska po polsku
    'poland': 'POL',        # Polska po angielsku
    'österreich': 'AUT',    # Austria po niemiecku
    'austria': 'AUT',       # Austria po angielsku
    'francja': 'FRA',       # Francja po polsku
    'france': 'FRA',        # Francja po angielsku
    'frankreich': 'FRA',    # Francja po niemiecku
    'włochy': 'ITA',        # Włochy po polsku
    'italy': 'ITA',         # Włochy po angielsku
    'italien': 'ITA',       # Włochy po niemiecku
    'hiszpania': 'ESP',     # Hiszpania po polsku
    'spain': 'ESP',         # Hiszpania po angielsku
    'spanien': 'ESP',       # Hiszpania po niemiecku
    'stany zjednoczone': 'USA',  # USA po polsku
    'usa': 'USA',               # USA po angielsku
    'vereinigte staaten': 'USA',# USA po niemiecku
    'wielka brytania': 'GBR',   # Wielka Brytania po polsku
    'united kingdom': 'GBR',    # Wielka Brytania po angielsku
    'vereinigtes königreich': 'GBR', # Wielka Brytania po niemiecku
    'unknown': None,
    'nieznany': None,
    'unbekannt': None
    # Dodaj inne kraje w razie potrzeby
}

def get_iso_alpha3(country_name):
    country_code = country_name_mapping.get(country_name.lower())
    if country_code:
        return country_code
    else:
        # Jeśli nie znaleziono w mapowaniu, spróbuj użyć pycountry
        try:
            country = pycountry.countries.lookup(country_name)
            return country.alpha_3
        except LookupError:
            return None

def main(language):
    translations = page_translations.get(language, page_translations['Polish'])

    st.title(translations['header'])
    st.markdown(translations['description'])

    # Pobieranie danych z plików JSON
    try:
        stats = get_stats()
        history = get_history()
    except Exception as e:
        st.error(f"{translations['no_data']} ({e})")
        st.stop()

    # Kluczowe metryki
    total_analyses = stats.get("total_analyses", 0)
    total_frauds_detected = stats.get("total_frauds_detected", 0)
    if total_analyses > 0:
        fraud_percentage = (total_frauds_detected / total_analyses) * 100
    else:
        fraud_percentage = 0  # Ustawienie na 0% w przypadku braku analiz

    # Wyświetlenie metryk za pomocą st.metric()
    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric(label=translations['total_analyses'], value=str(total_analyses))
    with col2:
        st.metric(label=translations['total_frauds_detected'], value=str(total_frauds_detected))
    with col3:
        st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")

    st.markdown("---")

    # Dodanie interaktywnego filtra daty w głównym obszarze
    st.header(translations['select_date_range'])
    try:
        col_start, col_end = st.columns(2)
        with col_start:
            start_date = st.date_input(
                translations['select_date_range'] + " - " + "Start",
                value=datetime.now().date() - timedelta(days=translations['recent_days']),
                min_value=datetime.now().date() - timedelta(days=365),
                max_value=datetime.now().date()
            )
        with col_end:
            end_date = st.date_input(
                translations['select_date_range'] + " - " + "End",
                value=datetime.now().date(),
                min_value=start_date,
                max_value=datetime.now().date()
            )
    except Exception as e:
        st.error(f"{translations['no_data']} ({e})")
        st.stop()

    # Filtracja historii na podstawie daty
    if history:
        try:
            df_history = pd.DataFrame(history)
            # Upewnij się, że 'timestamp' jest w formacie datetime
            df_history['timestamp'] = pd.to_datetime(df_history['timestamp'], errors='coerce')
            # Usunięcie wpisów z błędnymi datami
            df_history = df_history.dropna(subset=['timestamp'])
            # Filtracja na podstawie daty
            mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
            df_filtered = df_history.loc[mask]
        except Exception as e:
            st.error(f"{translations['no_data']} ({e})")
            st.stop()

        # Wyświetlenie tabeli historii analiz
        st.markdown(f"### {translations['history_title']}")
        if not df_filtered.empty:
            st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)

            # Opcjonalnie: Dodanie możliwości eksportu danych
            csv = df_filtered.to_csv(index=False).encode('utf-8')
            st.download_button(
                label=translations['download_button'],
                data=csv,
                file_name='analysis_history.csv',
                mime='text/csv',
            )
        else:
            st.info(translations['no_data'])

        st.markdown("---")

        # Aktualizacja statystyk na podstawie filtrowanej historii
        total_filtered = df_filtered.shape[0]
        frauds_filtered = df_filtered['risk_assessment'].apply(
            lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
        fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0

        # Wyświetlenie metryk dla filtrowanej historii
        st.markdown("### " + translations['fraud_trend_title'])
        col1, col2, col3 = st.columns(3)
        with col1:
            st.metric(label=translations['total_analyses'], value=str(total_filtered))
        with col2:
            st.metric(label=translations['total_frauds_detected'], value=str(frauds_filtered))
        with col3:
            st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage_filtered:.2f}%")

        # Wizualizacja procentowego podziału oszustw
        st.markdown("### " + translations['fraud_vs_nonfraud'])
        fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
        fraud_labels = [translations['total_frauds_detected'], translations['total_analyses']]
        fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
                                               marker_colors=['#FF6347', '#4682B4'])])
        fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
        st.plotly_chart(fig_fraud_pie, use_container_width=True)

        # Trend oszustw w czasie
        st.markdown("### " + translations['frauds_over_time'])
        fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
        fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': translations['total_frauds_detected']}, inplace=True)
        fig_trend = px.line(fraud_over_time, x='Date', y=translations['total_frauds_detected'], title=translations['frauds_over_time'],
                            labels={'Date': translations['select_date_range'], translations['total_frauds_detected']: translations['total_frauds_detected']}, markers=True)
        fig_trend.update_traces(line=dict(color='firebrick'))
        st.plotly_chart(fig_trend, use_container_width=True)

        # Rozkład ocen ryzyka
        st.markdown("### " + translations['risk_distribution_title'])

        def extract_risk_score(risk_assessment):
            match = re.search(r'(\d+)/10', risk_assessment)
            return int(match.group(1)) if match else 0

        df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
        risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
        risk_distribution.columns = ['risk_score', 'count']
        fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
                          labels={'risk_score': translations['risk_distribution'], 'count': translations['total_analyses']},
                          color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
        st.plotly_chart(fig_risk, use_container_width=True)

        # Rozkład oszustw według krajów
        st.markdown("### " + translations['fraud_country_distribution_title'])

        def get_country(row):
            country, _ = get_phone_info(row['phone_number'])
            return country if country else "Unknown"

        df_filtered['country'] = df_filtered.apply(get_country, axis=1)
        fraud_countries = df_filtered['country'].value_counts().reset_index()
        fraud_countries.columns = ['country', 'counts']

        # Dodanie kodów krajów
        fraud_countries['iso_alpha'] = fraud_countries['country'].apply(
            lambda x: get_iso_alpha3(x) if x != "Unknown" else None)
        fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])

        if not fraud_countries.empty:
            fig_map = px.choropleth(
                fraud_countries,
                locations='iso_alpha',
                color='counts',
                hover_name='country',
                color_continuous_scale=px.colors.sequential.Plasma,
                title=translations['fraud_country_distribution_title']
            )
            fig_map.update_geos(showcountries=True, showcoastlines=True)
            st.plotly_chart(fig_map, use_container_width=True)
        else:
            st.info(translations['no_data'])

        st.markdown("---")

        # Gauge Chart - Procentowy udział oszustw
        st.markdown("### " + translations['fraud_percentage'])
        fig_gauge = go.Figure(go.Indicator(
            mode="gauge+number",
            value=fraud_percentage_filtered,
            title={'text': translations['fraud_percentage']},
            gauge={
                'axis': {'range': [0, 100]},
                'bar': {'color': "darkblue"},
                'steps': [
                    {'range': [0, 20], 'color': "#55efc4"},
                    {'range': [20, 40], 'color': "#81ecec"},
                    {'range': [40, 60], 'color': "#74b9ff"},
                    {'range': [60, 80], 'color': "#a29bfe"},
                    {'range': [80, 100], 'color': "#d63031"}
                ],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 70
                }
            }
        ))
        st.plotly_chart(fig_gauge, use_container_width=True)

    else:
        st.info(translations['no_data'])

# Upewnij się, że st.set_page_config() jest wywoływane tylko w app.py