Spaces:

rafaldembski
/

ScamDetector

Running

App Files Files Community

rafaldembski commited on Oct 1, 2024

Commit

b881485

verified ·

1 Parent(s): 4eb2567

Update pages/Statistics.py

Browse files

Files changed (1) hide show

pages/Statistics.py +51 -40

pages/Statistics.py CHANGED Viewed

@@ -4,7 +4,6 @@ import streamlit as st
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
-import json
 import os
 import re
 from datetime import datetime, timedelta
@@ -14,8 +13,8 @@ import requests
 # Importowanie funkcji z utils/functions.py
 from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers
-# Importowanie dodatkowych komponentów
-from streamlit_extras.metric_cards import card
 # Definiowanie tłumaczeń dla zakładki "Statystyki"
 page_translations = {
@@ -34,7 +33,6 @@ page_translations = {
         'fraud_trend_title': "Trendy Oszustw w Czasie",
         'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
         'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
-        'heatmap_title': "Heatmapa Oszustw w Czasie",
         'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
         'no_data': "Brak dostępnych danych do wyświetlenia.",
         'download_button': "📥 Pobierz dane jako CSV",
@@ -48,20 +46,19 @@ page_translations = {
         'header': "📊 Anwendungsstatistiken",
         'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
         'total_analyses': "Anzahl der analysierten Nachrichten",
-        'total_frauds_detected': "Erkannte Betrügereien",
         'fraud_percentage': "Betrugsprozentsatz",
-        'history_title': "Analyseverlauf der Nachrichten",
-        'frauds_over_time': "Anzahl der erkannten Betrügereien im Laufe der Zeit",
         'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
-        'fraud_country_distribution_title': "Betrug nach Ländern",
         'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
         'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
-        'fraud_country_distribution': "Betrug nach Ländern",
-        'heatmap_title': "Heatmap der Betrügereien im Laufe der Zeit",
-        'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
-        'no_data': "Keine Daten zur Anzeige verfügbar.",
         'download_button': "📥 Daten als CSV herunterladen",
-        'select_date_range': "Wähle einen Datumsbereich:",
         'search_placeholder': "Suche in der Historie:",
         'recent_days': 30
     },
@@ -80,7 +77,6 @@ page_translations = {
         'fraud_trend_title': "Fraud Trends Over Time",
         'risk_distribution': "Distribution of Fraud Risk Scores",
         'fraud_country_distribution': "Fraud Distribution by Countries",
-        'heatmap_title': "Fraud Heatmap Over Time",
         'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
         'no_data': "No data available to display.",
         'download_button': "📥 Download data as CSV",
@@ -90,17 +86,33 @@ page_translations = {
     }
 }
-# Mapowanie polskich nazw krajów na kody ISO alfa-3
 country_name_mapping = {
-    'niemcy': 'DEU',     # Niemcy
-    'polska': 'POL',     # Polska
-    'francja': 'FRA',    # Francja
-    'włochy': 'ITA',     # Włochy
-    'hiszpania': 'ESP',  # Hiszpania
-    'stany zjednoczone': 'USA',  # USA
-    'wielka brytania': 'GBR',    # Wielka Brytania
     'unknown': None,
-    'nieznany': None
     # Dodaj inne kraje w razie potrzeby
 }
@@ -128,7 +140,7 @@ def main(language):
         stats = get_stats()
         history = get_history()
     except Exception as e:
-        st.error(f"Wystąpił błąd podczas pobierania danych: {e}")
         st.stop()
     # Kluczowe metryki
@@ -140,12 +152,12 @@ def main(language):
     with col1:
         card(
             title=translations['total_analyses'],
-            value=str(total_analyses),
         )
     with col2:
         card(
             title=translations['total_frauds_detected'],
-            value=str(total_frauds_detected),
         )
     with col3:
         if total_analyses > 0:
@@ -154,7 +166,7 @@ def main(language):
             fraud_percentage = 0  # Ustawienie na 0% w przypadku braku analiz
         card(
             title=translations['fraud_percentage'],
-            value=f"{fraud_percentage:.2f}%",
         )
     st.markdown("---")
@@ -178,7 +190,7 @@ def main(language):
                 max_value=datetime.now().date()
             )
     except Exception as e:
-        st.error(f"Wystąpił błąd przy wyborze daty: {e}")
         st.stop()
     # Filtracja historii na podstawie daty
@@ -193,7 +205,7 @@ def main(language):
             mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
             df_filtered = df_history.loc[mask]
         except Exception as e:
-            st.error(f"Wystąpił błąd podczas filtrowania danych: {e}")
             st.stop()
         # Dodanie pola wyszukiwania
@@ -223,7 +235,8 @@ def main(language):
         # Aktualizacja statystyk na podstawie filtrowanej historii
         total_filtered = df_filtered.shape[0]
-        frauds_filtered = df_filtered['risk_assessment'].apply(lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
         fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
         # Wyświetlenie metryk dla filtrowanej historii
@@ -232,23 +245,23 @@ def main(language):
         with col4:
             card(
                 title=translations['total_analyses'],
-                value=str(total_filtered),
             )
         with col5:
             card(
                 title=translations['total_frauds_detected'],
-                value=str(frauds_filtered),
             )
         with col6:
             card(
                 title=translations['fraud_percentage'],
-                value=f"{fraud_percentage_filtered:.2f}%",
             )
         # Wizualizacja procentowego podziału oszustw
         st.markdown("### " + translations['fraud_vs_nonfraud'])
         fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
-        fraud_labels = ['Oszustwo', 'Bezpieczne']
         fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
                                                marker_colors=['#FF6347', '#4682B4'])])
         fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
@@ -257,16 +270,15 @@ def main(language):
         # Trend oszustw w czasie
         st.markdown("### " + translations['frauds_over_time'])
         fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
-        fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': 'Frauds Detected'}, inplace=True)
-        fig_trend = px.line(fraud_over_time, x='Date', y='Frauds Detected', title=translations['frauds_over_time'],
-                            labels={'Date': 'Data', 'Frauds Detected': 'Wykryte Oszustwa'}, markers=True)
         fig_trend.update_traces(line=dict(color='firebrick'))
         st.plotly_chart(fig_trend, use_container_width=True)
         # Rozkład ocen ryzyka
         st.markdown("### " + translations['risk_distribution_title'])
         def extract_risk_score(risk_assessment):
             match = re.search(r'(\d+)/10', risk_assessment)
             return int(match.group(1)) if match else 0
@@ -275,14 +287,13 @@ def main(language):
         risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
         risk_distribution.columns = ['risk_score', 'count']
         fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
-                          labels={'risk_score': 'Ocena Ryzyka', 'count': 'Liczba Wiadomości'},
                           color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
         st.plotly_chart(fig_risk, use_container_width=True)
         # Rozkład oszustw według krajów
         st.markdown("### " + translations['fraud_country_distribution_title'])
         def get_country(row):
             country, _ = get_phone_info(row['phone_number'])
             return country if country else "Unknown"

 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 import os
 import re
 from datetime import datetime, timedelta
 # Importowanie funkcji z utils/functions.py
 from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers
+# Importowanie funkcji 'card' z 'streamlit_extras.card'
+from streamlit_extras.card import card
 # Definiowanie tłumaczeń dla zakładki "Statystyki"
 page_translations = {
         'fraud_trend_title': "Trendy Oszustw w Czasie",
         'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
         'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
         'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
         'no_data': "Brak dostępnych danych do wyświetlenia.",
         'download_button': "📥 Pobierz dane jako CSV",
         'header': "📊 Anwendungsstatistiken",
         'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
         'total_analyses': "Anzahl der analysierten Nachrichten",
+        'total_frauds_detected': "Erkannte Betrugsfälle",
         'fraud_percentage': "Betrugsprozentsatz",
+        'history_title': "Verlauf analysierter Nachrichten",
+        'frauds_over_time': "Anzahl erkannter Betrugsfälle im Laufe der Zeit",
         'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
+        'fraud_country_distribution_title': "Verteilung der Betrugsfälle nach Ländern",
         'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
         'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
+        'fraud_country_distribution': "Verteilung der Betrugsfälle nach Ländern",
+        'fraud_vs_nonfraud': "Prozentuale Aufteilung: Betrug vs. Sicher",
+        'no_data': "Keine Daten zum Anzeigen verfügbar.",
         'download_button': "📥 Daten als CSV herunterladen",
+        'select_date_range': "Datumsbereich auswählen:",
         'search_placeholder': "Suche in der Historie:",
         'recent_days': 30
     },
         'fraud_trend_title': "Fraud Trends Over Time",
         'risk_distribution': "Distribution of Fraud Risk Scores",
         'fraud_country_distribution': "Fraud Distribution by Countries",
         'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
         'no_data': "No data available to display.",
         'download_button': "📥 Download data as CSV",
     }
 }
+# Mapowanie nazw krajów na kody ISO alfa-3
 country_name_mapping = {
+    'niemcy': 'DEU',        # Niemcy po polsku
+    'germany': 'DEU',       # Niemcy po angielsku
+    'deutschland': 'DEU',   # Niemcy po niemiecku
+    'polska': 'POL',        # Polska po polsku
+    'poland': 'POL',        # Polska po angielsku
+    'österreich': 'AUT',    # Austria po niemiecku
+    'austria': 'AUT',       # Austria po angielsku
+    'francja': 'FRA',       # Francja po polsku
+    'france': 'FRA',        # Francja po angielsku
+    'frankreich': 'FRA',    # Francja po niemiecku
+    'włochy': 'ITA',        # Włochy po polsku
+    'italy': 'ITA',         # Włochy po angielsku
+    'italien': 'ITA',       # Włochy po niemiecku
+    'hiszpania': 'ESP',     # Hiszpania po polsku
+    'spain': 'ESP',         # Hiszpania po angielsku
+    'spanien': 'ESP',       # Hiszpania po niemiecku
+    'stany zjednoczone': 'USA',  # USA po polsku
+    'usa': 'USA',               # USA po angielsku
+    'vereinigte staaten': 'USA',# USA po niemiecku
+    'wielka brytania': 'GBR',   # Wielka Brytania po polsku
+    'united kingdom': 'GBR',    # Wielka Brytania po angielsku
+    'vereinigtes königreich': 'GBR', # Wielka Brytania po niemiecku
     'unknown': None,
+    'nieznany': None,
+    'unbekannt': None
     # Dodaj inne kraje w razie potrzeby
 }
         stats = get_stats()
         history = get_history()
     except Exception as e:
+        st.error(f"{translations['no_data']} ({e})")
         st.stop()
     # Kluczowe metryki
     with col1:
         card(
             title=translations['total_analyses'],
+            text=str(total_analyses),
         )
     with col2:
         card(
             title=translations['total_frauds_detected'],
+            text=str(total_frauds_detected),
         )
     with col3:
         if total_analyses > 0:
             fraud_percentage = 0  # Ustawienie na 0% w przypadku braku analiz
         card(
             title=translations['fraud_percentage'],
+            text=f"{fraud_percentage:.2f}%",
         )
     st.markdown("---")
                 max_value=datetime.now().date()
             )
     except Exception as e:
+        st.error(f"{translations['no_data']} ({e})")
         st.stop()
     # Filtracja historii na podstawie daty
             mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
             df_filtered = df_history.loc[mask]
         except Exception as e:
+            st.error(f"{translations['no_data']} ({e})")
             st.stop()
         # Dodanie pola wyszukiwania
         # Aktualizacja statystyk na podstawie filtrowanej historii
         total_filtered = df_filtered.shape[0]
+        frauds_filtered = df_filtered['risk_assessment'].apply(
+            lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
         fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
         # Wyświetlenie metryk dla filtrowanej historii
         with col4:
             card(
                 title=translations['total_analyses'],
+                text=str(total_filtered),
             )
         with col5:
             card(
                 title=translations['total_frauds_detected'],
+                text=str(frauds_filtered),
             )
         with col6:
             card(
                 title=translations['fraud_percentage'],
+                text=f"{fraud_percentage_filtered:.2f}%",
             )
         # Wizualizacja procentowego podziału oszustw
         st.markdown("### " + translations['fraud_vs_nonfraud'])
         fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
+        fraud_labels = [translations['total_frauds_detected'], translations['total_analyses']]
         fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
                                                marker_colors=['#FF6347', '#4682B4'])])
         fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
         # Trend oszustw w czasie
         st.markdown("### " + translations['frauds_over_time'])
         fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
+        fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': translations['total_frauds_detected']}, inplace=True)
+        fig_trend = px.line(fraud_over_time, x='Date', y=translations['total_frauds_detected'], title=translations['frauds_over_time'],
+                            labels={'Date': translations['select_date_range'], translations['total_frauds_detected']: translations['total_frauds_detected']}, markers=True)
         fig_trend.update_traces(line=dict(color='firebrick'))
         st.plotly_chart(fig_trend, use_container_width=True)
         # Rozkład ocen ryzyka
         st.markdown("### " + translations['risk_distribution_title'])
         def extract_risk_score(risk_assessment):
             match = re.search(r'(\d+)/10', risk_assessment)
             return int(match.group(1)) if match else 0
         risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
         risk_distribution.columns = ['risk_score', 'count']
         fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
+                          labels={'risk_score': translations['risk_distribution'], 'count': translations['total_analyses']},
                           color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
         st.plotly_chart(fig_risk, use_container_width=True)
         # Rozkład oszustw według krajów
         st.markdown("### " + translations['fraud_country_distribution_title'])
         def get_country(row):
             country, _ = get_phone_info(row['phone_number'])
             return country if country else "Unknown"