File size: 14,744 Bytes
a320165
 
128b0c3
7ac8b0d
 
fe1d0fa
7360f5c
8deec8b
c7b500a
67ad7bd
27f4c94
 
 
 
67ad7bd
b3eb2dd
 
 
 
 
 
 
 
 
 
67ad7bd
 
 
 
 
 
 
 
8545836
67ad7bd
 
 
b3eb2dd
 
 
 
 
 
 
b881485
b3eb2dd
b881485
 
82d13f9
b881485
7360f5c
 
b881485
 
 
67ad7bd
b881485
2bc78fb
b3eb2dd
 
 
 
 
 
 
 
 
 
 
82d13f9
 
7360f5c
 
 
fe1d0fa
8545836
67ad7bd
 
2bc78fb
b3eb2dd
 
3655f6c
b881485
2bc78fb
b881485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bc78fb
b881485
 
2bc78fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3eb2dd
4eb2567
 
004670a
 
4eb2567
7360f5c
2bc78fb
 
 
 
b881485
2bc78fb
4eb2567
b3eb2dd
7360f5c
 
6a02207
 
 
 
4eb2567
6a02207
 
4eb2567
6a02207
 
 
cb63f35
6a02207
4eb2567
27f4c94
4eb2567
2bc78fb
 
 
4eb2567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bc78fb
b881485
2bc78fb
4eb2567
004670a
b3eb2dd
2bc78fb
 
 
 
 
 
 
 
 
 
b881485
2bc78fb
4eb2567
27f4c94
 
 
 
4eb2567
27f4c94
 
 
 
 
 
 
 
 
 
4eb2567
27f4c94
4eb2567
67ad7bd
 
b881485
 
67ad7bd
4eb2567
67ad7bd
004670a
6a02207
 
 
 
 
 
 
4eb2567
67ad7bd
 
 
b881485
67ad7bd
 
 
 
4eb2567
82d13f9
67ad7bd
 
b881485
 
 
67ad7bd
82d13f9
4eb2567
82d13f9
67ad7bd
4eb2567
8deec8b
 
 
4eb2567
67ad7bd
 
82d13f9
7360f5c
b881485
7360f5c
82d13f9
4eb2567
82d13f9
67ad7bd
4eb2567
67ad7bd
 
 
4eb2567
67ad7bd
 
82d13f9
4eb2567
82d13f9
4eb2567
 
82d13f9
4eb2567
82d13f9
a320165
82d13f9
8545836
 
 
 
82d13f9
a320165
27f4c94
a320165
fe1d0fa
a320165
4eb2567
27f4c94
4eb2567
67ad7bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eb2567
2bc78fb
 
4eb2567
6a02207
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# pages/Statistics.py

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
import re
from datetime import datetime, timedelta
import pycountry
import requests

# Importowanie funkcji z utils/functions.py
from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers

# Definiowanie t艂umacze艅 dla zak艂adki "Statystyki"
page_translations = {
    'Polish': {
        'page_title': "馃搳 Statystyki",
        'page_icon': "馃搱",
        'header': "馃搳 Statystyki Aplikacji",
        'description': "Poni偶ej znajduj膮 si臋 statystyki analizy wiadomo艣ci w aplikacji.",
        'total_analyses': "Liczba przeanalizowanych wiadomo艣ci",
        'total_frauds_detected': "Wykryte oszustwa",
        'fraud_percentage': "Procent oszustw",
        'history_title': "Historia Analizowanych Wiadomo艣ci",
        'frauds_over_time': "Liczba Wykrytych Oszustw w Czasie",
        'risk_distribution_title': "Rozk艂ad Ocen Ryzyka Oszustwa",
        'fraud_country_distribution_title': "Rozk艂ad Oszustw Wed艂ug Kraj贸w",
        'fraud_trend_title': "Trendy Oszustw w Czasie",
        'risk_distribution': "Rozk艂ad Ocen Ryzyka Oszustwa",
        'fraud_country_distribution': "Rozk艂ad Oszustw Wed艂ug Kraj贸w",
        'fraud_vs_nonfraud': "Procentowy Podzia艂: Oszustwa vs Bezpieczne",
        'no_data': "Brak dost臋pnych danych do wy艣wietlenia.",
        'download_button': "馃摜 Pobierz dane jako CSV",
        'select_date_range': "Wybierz zakres dat:",
        'recent_days': 30  # Domy艣lny zakres dat
    },
    'German': {
        'page_title': "馃搳 Statistiken",
        'page_icon': "馃搱",
        'header': "馃搳 Anwendungsstatistiken",
        'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
        'total_analyses': "Anzahl der analysierten Nachrichten",
        'total_frauds_detected': "Erkannte Betrugsf盲lle",
        'fraud_percentage': "Betrugsprozentsatz",
        'history_title': "Verlauf analysierter Nachrichten",
        'frauds_over_time': "Anzahl erkannter Betrugsf盲lle im Laufe der Zeit",
        'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
        'fraud_country_distribution_title': "Verteilung der Betrugsf盲lle nach L盲ndern",
        'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
        'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
        'fraud_country_distribution': "Verteilung der Betrugsf盲lle nach L盲ndern",
        'fraud_vs_nonfraud': "Prozentuale Aufteilung: Betrug vs. Sicher",
        'no_data': "Keine Daten zum Anzeigen verf眉gbar.",
        'download_button': "馃摜 Daten als CSV herunterladen",
        'select_date_range': "Datumsbereich ausw盲hlen:",
        'recent_days': 30
    },
    'English': {
        'page_title': "馃搳 Statistics",
        'page_icon': "馃搱",
        'header': "馃搳 Application Statistics",
        'description': "Below are the statistics of message analysis in the app.",
        'total_analyses': "Total Messages Analyzed",
        'total_frauds_detected': "Frauds Detected",
        'fraud_percentage': "Fraud Percentage",
        'history_title': "History of Analyzed Messages",
        'frauds_over_time': "Number of Detected Frauds Over Time",
        'risk_distribution_title': "Distribution of Fraud Risk Scores",
        'fraud_country_distribution_title': "Fraud Distribution by Countries",
        'fraud_trend_title': "Fraud Trends Over Time",
        'risk_distribution': "Distribution of Fraud Risk Scores",
        'fraud_country_distribution': "Fraud Distribution by Countries",
        'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
        'no_data': "No data available to display.",
        'download_button': "馃摜 Download data as CSV",
        'select_date_range': "Select date range:",
        'recent_days': 30
    }
}

# Mapowanie nazw kraj贸w na kody ISO alfa-3
country_name_mapping = {
    'niemcy': 'DEU',        # Niemcy po polsku
    'germany': 'DEU',       # Niemcy po angielsku
    'deutschland': 'DEU',   # Niemcy po niemiecku
    'polska': 'POL',        # Polska po polsku
    'poland': 'POL',        # Polska po angielsku
    '枚sterreich': 'AUT',    # Austria po niemiecku
    'austria': 'AUT',       # Austria po angielsku
    'francja': 'FRA',       # Francja po polsku
    'france': 'FRA',        # Francja po angielsku
    'frankreich': 'FRA',    # Francja po niemiecku
    'w艂ochy': 'ITA',        # W艂ochy po polsku
    'italy': 'ITA',         # W艂ochy po angielsku
    'italien': 'ITA',       # W艂ochy po niemiecku
    'hiszpania': 'ESP',     # Hiszpania po polsku
    'spain': 'ESP',         # Hiszpania po angielsku
    'spanien': 'ESP',       # Hiszpania po niemiecku
    'stany zjednoczone': 'USA',  # USA po polsku
    'usa': 'USA',               # USA po angielsku
    'vereinigte staaten': 'USA',# USA po niemiecku
    'wielka brytania': 'GBR',   # Wielka Brytania po polsku
    'united kingdom': 'GBR',    # Wielka Brytania po angielsku
    'vereinigtes k枚nigreich': 'GBR', # Wielka Brytania po niemiecku
    'unknown': None,
    'nieznany': None,
    'unbekannt': None
    # Dodaj inne kraje w razie potrzeby
}

def get_iso_alpha3(country_name):
    country_code = country_name_mapping.get(country_name.lower())
    if country_code:
        return country_code
    else:
        # Je艣li nie znaleziono w mapowaniu, spr贸buj u偶y膰 pycountry
        try:
            country = pycountry.countries.lookup(country_name)
            return country.alpha_3
        except LookupError:
            return None

def main(language):
    translations = page_translations.get(language, page_translations['Polish'])

    st.title(translations['header'])
    st.markdown(translations['description'])

    # Pobieranie danych z plik贸w JSON
    try:
        stats = get_stats()
        history = get_history()
    except Exception as e:
        st.error(f"{translations['no_data']} ({e})")
        st.stop()

    # Kluczowe metryki
    total_analyses = stats.get("total_analyses", 0)
    total_frauds_detected = stats.get("total_frauds_detected", 0)
    if total_analyses > 0:
        fraud_percentage = (total_frauds_detected / total_analyses) * 100
    else:
        fraud_percentage = 0  # Ustawienie na 0% w przypadku braku analiz

    # Wy艣wietlenie metryk za pomoc膮 st.metric()
    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric(label=translations['total_analyses'], value=str(total_analyses))
    with col2:
        st.metric(label=translations['total_frauds_detected'], value=str(total_frauds_detected))
    with col3:
        st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")

    st.markdown("---")

    # Dodanie interaktywnego filtra daty w g艂贸wnym obszarze
    st.header(translations['select_date_range'])
    try:
        col_start, col_end = st.columns(2)
        with col_start:
            start_date = st.date_input(
                translations['select_date_range'] + " - " + "Start",
                value=datetime.now().date() - timedelta(days=translations['recent_days']),
                min_value=datetime.now().date() - timedelta(days=365),
                max_value=datetime.now().date()
            )
        with col_end:
            end_date = st.date_input(
                translations['select_date_range'] + " - " + "End",
                value=datetime.now().date(),
                min_value=start_date,
                max_value=datetime.now().date()
            )
    except Exception as e:
        st.error(f"{translations['no_data']} ({e})")
        st.stop()

    # Filtracja historii na podstawie daty
    if history:
        try:
            df_history = pd.DataFrame(history)
            # Upewnij si臋, 偶e 'timestamp' jest w formacie datetime
            df_history['timestamp'] = pd.to_datetime(df_history['timestamp'], errors='coerce')
            # Usuni臋cie wpis贸w z b艂臋dnymi datami
            df_history = df_history.dropna(subset=['timestamp'])
            # Filtracja na podstawie daty
            mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
            df_filtered = df_history.loc[mask]
        except Exception as e:
            st.error(f"{translations['no_data']} ({e})")
            st.stop()

        # Wy艣wietlenie tabeli historii analiz
        st.markdown(f"### {translations['history_title']}")
        if not df_filtered.empty:
            st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)

            # Opcjonalnie: Dodanie mo偶liwo艣ci eksportu danych
            csv = df_filtered.to_csv(index=False).encode('utf-8')
            st.download_button(
                label=translations['download_button'],
                data=csv,
                file_name='analysis_history.csv',
                mime='text/csv',
            )
        else:
            st.info(translations['no_data'])

        st.markdown("---")

        # Aktualizacja statystyk na podstawie filtrowanej historii
        total_filtered = df_filtered.shape[0]
        frauds_filtered = df_filtered['risk_assessment'].apply(
            lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
        fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0

        # Wy艣wietlenie metryk dla filtrowanej historii
        st.markdown("### " + translations['fraud_trend_title'])
        col1, col2, col3 = st.columns(3)
        with col1:
            st.metric(label=translations['total_analyses'], value=str(total_filtered))
        with col2:
            st.metric(label=translations['total_frauds_detected'], value=str(frauds_filtered))
        with col3:
            st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage_filtered:.2f}%")

        # Wizualizacja procentowego podzia艂u oszustw
        st.markdown("### " + translations['fraud_vs_nonfraud'])
        fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
        fraud_labels = [translations['total_frauds_detected'], translations['total_analyses']]
        fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
                                               marker_colors=['#FF6347', '#4682B4'])])
        fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
        st.plotly_chart(fig_fraud_pie, use_container_width=True)

        # Trend oszustw w czasie
        st.markdown("### " + translations['frauds_over_time'])
        fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
        fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': translations['total_frauds_detected']}, inplace=True)
        fig_trend = px.line(fraud_over_time, x='Date', y=translations['total_frauds_detected'], title=translations['frauds_over_time'],
                            labels={'Date': translations['select_date_range'], translations['total_frauds_detected']: translations['total_frauds_detected']}, markers=True)
        fig_trend.update_traces(line=dict(color='firebrick'))
        st.plotly_chart(fig_trend, use_container_width=True)

        # Rozk艂ad ocen ryzyka
        st.markdown("### " + translations['risk_distribution_title'])

        def extract_risk_score(risk_assessment):
            match = re.search(r'(\d+)/10', risk_assessment)
            return int(match.group(1)) if match else 0

        df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
        risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
        risk_distribution.columns = ['risk_score', 'count']
        fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
                          labels={'risk_score': translations['risk_distribution'], 'count': translations['total_analyses']},
                          color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
        st.plotly_chart(fig_risk, use_container_width=True)

        # Rozk艂ad oszustw wed艂ug kraj贸w
        st.markdown("### " + translations['fraud_country_distribution_title'])

        def get_country(row):
            country, _ = get_phone_info(row['phone_number'])
            return country if country else "Unknown"

        df_filtered['country'] = df_filtered.apply(get_country, axis=1)
        fraud_countries = df_filtered['country'].value_counts().reset_index()
        fraud_countries.columns = ['country', 'counts']

        # Dodanie kod贸w kraj贸w
        fraud_countries['iso_alpha'] = fraud_countries['country'].apply(
            lambda x: get_iso_alpha3(x) if x != "Unknown" else None)
        fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])

        if not fraud_countries.empty:
            fig_map = px.choropleth(
                fraud_countries,
                locations='iso_alpha',
                color='counts',
                hover_name='country',
                color_continuous_scale=px.colors.sequential.Plasma,
                title=translations['fraud_country_distribution_title']
            )
            fig_map.update_geos(showcountries=True, showcoastlines=True)
            st.plotly_chart(fig_map, use_container_width=True)
        else:
            st.info(translations['no_data'])

        st.markdown("---")

        # Gauge Chart - Procentowy udzia艂 oszustw
        st.markdown("### " + translations['fraud_percentage'])
        fig_gauge = go.Figure(go.Indicator(
            mode="gauge+number",
            value=fraud_percentage_filtered,
            title={'text': translations['fraud_percentage']},
            gauge={
                'axis': {'range': [0, 100]},
                'bar': {'color': "darkblue"},
                'steps': [
                    {'range': [0, 20], 'color': "#55efc4"},
                    {'range': [20, 40], 'color': "#81ecec"},
                    {'range': [40, 60], 'color': "#74b9ff"},
                    {'range': [60, 80], 'color': "#a29bfe"},
                    {'range': [80, 100], 'color': "#d63031"}
                ],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 70
                }
            }
        ))
        st.plotly_chart(fig_gauge, use_container_width=True)

    else:
        st.info(translations['no_data'])

# Upewnij si臋, 偶e st.set_page_config() jest wywo艂ywane tylko w app.py