# pages/Statistics.py

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
import re
from datetime import datetime, timedelta
import pycountry
import requests

# Importowanie funkcji z utils/functions.py
from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers

# Definiowanie tłumaczeń dla zakładki "Statystyki"
page_translations = {
    'Polish': {
        'page_title': "📊 Statystyki",
        'page_icon': "📈",
        'header': "📊 Statystyki Aplikacji",
        'description': "Poniżej znajdują się statystyki analizy wiadomości w aplikacji.",
        'total_analyses': "Liczba przeanalizowanych wiadomości",
        'total_frauds_detected': "Wykryte oszustwa",
        'fraud_percentage': "Procent oszustw",
        'history_title': "Historia Analizowanych Wiadomości",
        'frauds_over_time': "Liczba Wykrytych Oszustw w Czasie",
        'risk_distribution_title': "Rozkład Ocen Ryzyka Oszustwa",
        'fraud_country_distribution_title': "Rozkład Oszustw Według Krajów",
        'fraud_trend_title': "Trendy Oszustw w Czasie",
        'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
        'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
        'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
        'no_data': "Brak dostępnych danych do wyświetlenia.",
        'download_button': "📥 Pobierz dane jako CSV",
        'select_date_range': "Wybierz zakres dat:",
        'recent_days': 30  # Domyślny zakres dat
    },
    'German': {
        'page_title': "📊 Statistiken",
        'page_icon': "📈",
        'header': "📊 Anwendungsstatistiken",
        'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
        'total_analyses': "Anzahl der analysierten Nachrichten",
        'total_frauds_detected': "Erkannte Betrugsfälle",
        'fraud_percentage': "Betrugsprozentsatz",
        'history_title': "Verlauf analysierter Nachrichten",
        'frauds_over_time': "Anzahl erkannter Betrugsfälle im Laufe der Zeit",
        'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
        'fraud_country_distribution_title': "Verteilung der Betrugsfälle nach Ländern",
        'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
        'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
        'fraud_country_distribution': "Verteilung der Betrugsfälle nach Ländern",
        'fraud_vs_nonfraud': "Prozentuale Aufteilung: Betrug vs. Sicher",
        'no_data': "Keine Daten zum Anzeigen verfügbar.",
        'download_button': "📥 Daten als CSV herunterladen",
        'select_date_range': "Datumsbereich auswählen:",
        'recent_days': 30
    },
    'English': {
        'page_title': "📊 Statistics",
        'page_icon': "📈",
        'header': "📊 Application Statistics",
        'description': "Below are the statistics of message analysis in the app.",
        'total_analyses': "Total Messages Analyzed",
        'total_frauds_detected': "Frauds Detected",
        'fraud_percentage': "Fraud Percentage",
        'history_title': "History of Analyzed Messages",
        'frauds_over_time': "Number of Detected Frauds Over Time",
        'risk_distribution_title': "Distribution of Fraud Risk Scores",
        'fraud_country_distribution_title': "Fraud Distribution by Countries",
        'fraud_trend_title': "Fraud Trends Over Time",
        'risk_distribution': "Distribution of Fraud Risk Scores",
        'fraud_country_distribution': "Fraud Distribution by Countries",
        'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
        'no_data': "No data available to display.",
        'download_button': "📥 Download data as CSV",
        'select_date_range': "Select date range:",
        'recent_days': 30
    }
}

# Mapowanie nazw krajów na kody ISO alfa-3
country_name_mapping = {
    'niemcy': 'DEU',        # Niemcy po polsku
    'germany': 'DEU',       # Niemcy po angielsku
    'deutschland': 'DEU',   # Niemcy po niemiecku
    'polska': 'POL',        # Polska po polsku
    'poland': 'POL',        # Polska po angielsku
    'österreich': 'AUT',    # Austria po niemiecku
    'austria': 'AUT',       # Austria po angielsku
    'francja': 'FRA',       # Francja po polsku
    'france': 'FRA',        # Francja po angielsku
    'frankreich': 'FRA',    # Francja po niemiecku
    'włochy': 'ITA',        # Włochy po polsku
    'italy': 'ITA',         # Włochy po angielsku
    'italien': 'ITA',       # Włochy po niemiecku
    'hiszpania': 'ESP',     # Hiszpania po polsku
    'spain': 'ESP',         # Hiszpania po angielsku
    'spanien': 'ESP',       # Hiszpania po niemiecku
    'stany zjednoczone': 'USA',  # USA po polsku
    'usa': 'USA',               # USA po angielsku
    'vereinigte staaten': 'USA',# USA po niemiecku
    'wielka brytania': 'GBR',   # Wielka Brytania po polsku
    'united kingdom': 'GBR',    # Wielka Brytania po angielsku
    'vereinigtes königreich': 'GBR', # Wielka Brytania po niemiecku
    'unknown': None,
    'nieznany': None,
    'unbekannt': None
    # Dodaj inne kraje w razie potrzeby
}

def get_iso_alpha3(country_name):
    country_code = country_name_mapping.get(country_name.lower())
    if country_code:
        return country_code
    else:
        # Jeśli nie znaleziono w mapowaniu, spróbuj użyć pycountry
        try:
            country = pycountry.countries.lookup(country_name)
            return country.alpha_3
        except LookupError:
            return None

def main(language):
    translations = page_translations.get(language, page_translations['Polish'])

    st.title(translations['header'])
    st.markdown(translations['description'])

    # Pobieranie danych z plików JSON
    try:
        stats = get_stats()
        history = get_history()
    except Exception as e:
        st.error(f"{translations['no_data']} ({e})")
        st.stop()

    # Kluczowe metryki
    total_analyses = stats.get("total_analyses", 0)
    total_frauds_detected = stats.get("total_frauds_detected", 0)
    if total_analyses > 0:
        fraud_percentage = (total_frauds_detected / total_analyses) * 100
    else:
        fraud_percentage = 0  # Ustawienie na 0% w przypadku braku analiz

    # Wyświetlenie metryk za pomocą st.metric()
    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric(label=translations['total_analyses'], value=str(total_analyses))
    with col2:
        st.metric(label=translations['total_frauds_detected'], value=str(total_frauds_detected))
    with col3:
        st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")

    st.markdown("---")

    # Dodanie interaktywnego filtra daty w głównym obszarze
    st.header(translations['select_date_range'])
    try:
        col_start, col_end = st.columns(2)
        with col_start:
            start_date = st.date_input(
                translations['select_date_range'] + " - " + "Start",
                value=datetime.now().date() - timedelta(days=translations['recent_days']),
                min_value=datetime.now().date() - timedelta(days=365),
                max_value=datetime.now().date()
            )
        with col_end:
            end_date = st.date_input(
                translations['select_date_range'] + " - " + "End",
                value=datetime.now().date(),
                min_value=start_date,
                max_value=datetime.now().date()
            )
    except Exception as e:
        st.error(f"{translations['no_data']} ({e})")
        st.stop()

    # Filtracja historii na podstawie daty
    if history:
        try:
            df_history = pd.DataFrame(history)
            # Upewnij się, że 'timestamp' jest w formacie datetime
            df_history['timestamp'] = pd.to_datetime(df_history['timestamp'], errors='coerce')
            # Usunięcie wpisów z błędnymi datami
            df_history = df_history.dropna(subset=['timestamp'])
            # Filtracja na podstawie daty
            mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
            df_filtered = df_history.loc[mask]
        except Exception as e:
            st.error(f"{translations['no_data']} ({e})")
            st.stop()

        # Wyświetlenie tabeli historii analiz
        st.markdown(f"### {translations['history_title']}")
        if not df_filtered.empty:
            st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)

            # Opcjonalnie: Dodanie możliwości eksportu danych
            csv = df_filtered.to_csv(index=False).encode('utf-8')
            st.download_button(
                label=translations['download_button'],
                data=csv,
                file_name='analysis_history.csv',
                mime='text/csv',
            )
        else:
            st.info(translations['no_data'])

        st.markdown("---")

        # Aktualizacja statystyk na podstawie filtrowanej historii
        total_filtered = df_filtered.shape[0]
        frauds_filtered = df_filtered['risk_assessment'].apply(
            lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
        fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0

        # Wyświetlenie metryk dla filtrowanej historii
        st.markdown("### " + translations['fraud_trend_title'])
        col1, col2, col3 = st.columns(3)
        with col1:
            st.metric(label=translations['total_analyses'], value=str(total_filtered))
        with col2:
            st.metric(label=translations['total_frauds_detected'], value=str(frauds_filtered))
        with col3:
            st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage_filtered:.2f}%")

        # Wizualizacja procentowego podziału oszustw
        st.markdown("### " + translations['fraud_vs_nonfraud'])
        fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
        fraud_labels = [translations['total_frauds_detected'], translations['total_analyses']]
        fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
                                               marker_colors=['#FF6347', '#4682B4'])])
        fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
        st.plotly_chart(fig_fraud_pie, use_container_width=True)

        # Trend oszustw w czasie
        st.markdown("### " + translations['frauds_over_time'])
        fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
        fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': translations['total_frauds_detected']}, inplace=True)
        fig_trend = px.line(fraud_over_time, x='Date', y=translations['total_frauds_detected'], title=translations['frauds_over_time'],
                            labels={'Date': translations['select_date_range'], translations['total_frauds_detected']: translations['total_frauds_detected']}, markers=True)
        fig_trend.update_traces(line=dict(color='firebrick'))
        st.plotly_chart(fig_trend, use_container_width=True)

        # Rozkład ocen ryzyka
        st.markdown("### " + translations['risk_distribution_title'])

        def extract_risk_score(risk_assessment):
            match = re.search(r'(\d+)/10', risk_assessment)
            return int(match.group(1)) if match else 0

        df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
        risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
        risk_distribution.columns = ['risk_score', 'count']
        fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
                          labels={'risk_score': translations['risk_distribution'], 'count': translations['total_analyses']},
                          color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
        st.plotly_chart(fig_risk, use_container_width=True)

        # Rozkład oszustw według krajów
        st.markdown("### " + translations['fraud_country_distribution_title'])

        def get_country(row):
            country, _ = get_phone_info(row['phone_number'])
            return country if country else "Unknown"

        df_filtered['country'] = df_filtered.apply(get_country, axis=1)
        fraud_countries = df_filtered['country'].value_counts().reset_index()
        fraud_countries.columns = ['country', 'counts']

        # Dodanie kodów krajów
        fraud_countries['iso_alpha'] = fraud_countries['country'].apply(
            lambda x: get_iso_alpha3(x) if x != "Unknown" else None)
        fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])

        if not fraud_countries.empty:
            fig_map = px.choropleth(
                fraud_countries,
                locations='iso_alpha',
                color='counts',
                hover_name='country',
                color_continuous_scale=px.colors.sequential.Plasma,
                title=translations['fraud_country_distribution_title']
            )
            fig_map.update_geos(showcountries=True, showcoastlines=True)
            st.plotly_chart(fig_map, use_container_width=True)
        else:
            st.info(translations['no_data'])

        st.markdown("---")

        # Gauge Chart - Procentowy udział oszustw
        st.markdown("### " + translations['fraud_percentage'])
        fig_gauge = go.Figure(go.Indicator(
            mode="gauge+number",
            value=fraud_percentage_filtered,
            title={'text': translations['fraud_percentage']},
            gauge={
                'axis': {'range': [0, 100]},
                'bar': {'color': "darkblue"},
                'steps': [
                    {'range': [0, 20], 'color': "#55efc4"},
                    {'range': [20, 40], 'color': "#81ecec"},
                    {'range': [40, 60], 'color': "#74b9ff"},
                    {'range': [60, 80], 'color': "#a29bfe"},
                    {'range': [80, 100], 'color': "#d63031"}
                ],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 70
                }
            }
        ))
        st.plotly_chart(fig_gauge, use_container_width=True)

    else:
        st.info(translations['no_data'])

# Upewnij się, że st.set_page_config() jest wywoływane tylko w app.py