Spaces:

rafaldembski
/

ScamDetector

Running

App Files Files Community

rafaldembski commited on Oct 1, 2024

Commit

27f4c94

verified ·

1 Parent(s): 004670a

Update pages/Statistics.py

Browse files

Files changed (1) hide show

pages/Statistics.py +77 -89

pages/Statistics.py CHANGED Viewed

@@ -9,10 +9,14 @@ import os
 import re
 from datetime import datetime, timedelta
 import pycountry
 # Opcjonalne: Importowanie dodatkowych komponentów
-# from streamlit_extras.metric_cards import style_metric_cards
-# from streamlit_elements import elements, mui, html
 # Definiowanie ścieżek do plików JSON
 FAKE_NUMBERS_FILE = os.path.join('data', 'fake_numbers.json')
@@ -36,7 +40,7 @@ page_translations = {
         'fraud_trend_title': "Trendy Oszustw w Czasie",
         'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
         'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
-        'heatmap_title': "Heatmapa Oszustw",
         'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
         'no_data': "Brak dostępnych danych do wyświetlenia.",
         'download_button': "📥 Pobierz dane jako CSV",
@@ -58,7 +62,7 @@ page_translations = {
         'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
         'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
         'fraud_country_distribution': "Betrug nach Ländern",
-        'heatmap_title': "Heatmap der Betrügereien",
         'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
         'no_data': "Keine Daten zur Anzeige verfügbar.",
         'download_button': "📥 Daten als CSV herunterladen",
@@ -80,7 +84,7 @@ page_translations = {
         'fraud_trend_title': "Fraud Trends Over Time",
         'risk_distribution': "Distribution of Fraud Risk Scores",
         'fraud_country_distribution': "Fraud Distribution by Countries",
-        'heatmap_title': "Fraud Heatmap",
         'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
         'no_data': "No data available to display.",
         'download_button': "📥 Download data as CSV",
@@ -89,67 +93,22 @@ page_translations = {
     }
 }
-def load_json(file_path):
-    """Ładuje dane z pliku JSON."""
-    if not os.path.exists(file_path):
-        if file_path.endswith('stats.json'):
-            return {"total_analyses": 0, "total_frauds_detected": 0}
-        else:
-            return []
-    with open(file_path, 'r', encoding='utf-8') as file:
-        try:
-            data = json.load(file)
-            return data
-        except json.JSONDecodeError:
-            st.error(f"Nie można załadować danych z {file_path}. Plik jest uszkodzony.")
-            if file_path.endswith('stats.json'):
-                return {"total_analyses": 0, "total_frauds_detected": 0}
-            return []
-def save_json(file_path, data):
-    """Zapisuje dane do pliku JSON."""
-    with open(file_path, 'w', encoding='utf-8') as file:
-        json.dump(data, file, ensure_ascii=False, indent=4)
-        st.success(f"Dane zostały zapisane do {file_path}.")
-def get_stats_from_json():
-    """Pobiera statystyki z pliku stats.json."""
-    stats = load_json(STATS_FILE)
-    return stats
-def get_history_from_json():
-    """Pobiera historię analiz z pliku history.json."""
-    return load_json(HISTORY_FILE)
-def get_fake_numbers_from_json():
-    """Pobiera fałszywe numery z pliku fake_numbers.json."""
-    return load_json(FAKE_NUMBERS_FILE)
-def get_country_code(name):
-    """Zwraca kod ISO-3 kraju na podstawie jego nazwy."""
-    try:
-        country = pycountry.countries.lookup(name)
-        return country.alpha_3
-    except LookupError:
-        return None
 def main(language):
     translations = page_translations.get(language, page_translations['English'])
-    # Wyświetlenie nagłówka
     st.title(translations['header'])
     st.markdown(translations['description'])
     # Pobieranie danych z plików JSON
-    stats = get_stats_from_json()
-    history = get_history_from_json()
     # Kluczowe metryki
     total_analyses = stats.get("total_analyses", 0)
     total_frauds_detected = stats.get("total_frauds_detected", 0)
     # Stylizacja kart metryk
-    # style_metric_cards()  # Uncomment if using metric_cards
     # Wyświetlenie metryk
     col1, col2, col3 = st.columns(3)
@@ -161,21 +120,21 @@ def main(language):
         fraud_percentage = 0  # Ustawienie na 0% w przypadku braku analiz
     col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
-    # Dodanie interaktywnego filtra daty
-    st.markdown("### " + translations['select_date_range'])
-    today = datetime.now().date()
-    recent_days = translations['recent_days']
-    start_date = st.date_input(
         "Start Date",
-        value=today - timedelta(days=recent_days),
-        min_value=today - timedelta(days=365),
-        max_value=today
     )
-    end_date = st.date_input(
         "End Date",
-        value=today,
         min_value=start_date,
-        max_value=today
     )
     # Filtracja historii na podstawie daty
@@ -187,9 +146,27 @@ def main(language):
         mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
         df_filtered = df_history.loc[mask]
         # Aktualizacja statystyk na podstawie filtrowanej historii
         total_filtered = df_filtered.shape[0]
-        frauds_filtered = df_filtered[df_filtered['risk_assessment'].str.contains(r'\d+/10')]['risk_assessment'].apply(lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7).sum()
         fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
         # Wyświetlenie metryk dla filtrowanej historii
@@ -222,7 +199,7 @@ def main(language):
         def extract_risk_score(risk_assessment):
             match = re.search(r'(\d+)/10', risk_assessment)
             return int(match.group(1)) if match else 0
         df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
         risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
         risk_distribution.columns = ['risk_score', 'count']
@@ -236,15 +213,15 @@ def main(language):
         def get_country(row):
             country, _ = get_phone_info(row['phone_number'])
             return country if country else "Unknown"
         df_filtered['country'] = df_filtered.apply(get_country, axis=1)
         fraud_countries = df_filtered['country'].value_counts().reset_index()
         fraud_countries.columns = ['country', 'counts']
         # Dodanie kodów krajów
-        fraud_countries['iso_alpha'] = fraud_countries['country'].apply(get_country_code)
         fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
         if not fraud_countries.empty:
             fig_map = px.choropleth(
                 fraud_countries,
@@ -254,30 +231,42 @@ def main(language):
                 color_continuous_scale=px.colors.sequential.Plasma,
                 title=translations['fraud_country_distribution_title']
             )
-            fig_map.update_geos(showcountries=True, showcoastlines=True)  # Usunięto powtarzający się 'showcountries'
             st.plotly_chart(fig_map, use_container_width=True)
         else:
             st.info(translations['no_data'])
         # Dodatkowe Wizualizacje
         st.markdown("### " + translations['heatmap_title'])
         # Heatmapa oszustw na podstawie lokalizacji
         if not fraud_countries.empty:
             # Przygotowanie danych geograficznych
-            # Użyjemy szerokości i długości geograficznej krajów z pycountry lub manualnie
-            country_coords = {
-                'Poland': (52.237049, 21.017532),
-                'Germany': (51.165691, 10.451526),
-                'Unknown': (20.0, 0.0)  # Centrum świata
-                # Dodaj inne kraje w razie potrzeby
-            }
-            def get_lat_lon(country_name):
-                return country_coords.get(country_name, (20.0, 0.0))
-            fraud_countries['lat'] = fraud_countries['country'].apply(lambda x: get_lat_lon(x)[0])
-            fraud_countries['lon'] = fraud_countries['country'].apply(lambda x: get_lat_lon(x)[1])
             # Tworzenie Heatmapy
             fig_heatmap = px.density_mapbox(
                 fraud_countries,
@@ -290,11 +279,11 @@ def main(language):
                 mapbox_style="stamen-terrain",
                 title=translations['heatmap_title']
             )
-            fig_heatmap.update_geos(showcountries=True, showcoastlines=True)  # Naprawiony argument
             st.plotly_chart(fig_heatmap, use_container_width=True)
         else:
             st.info(translations['no_data'])
         # Gauge Chart - Procentowy udział oszustw
         st.markdown("### " + translations['fraud_percentage'])
         fig_gauge = go.Figure(go.Indicator(
@@ -319,4 +308,3 @@ def main(language):
             }
         ))
         st.plotly_chart(fig_gauge, use_container_width=True)

 import re
 from datetime import datetime, timedelta
 import pycountry
+import requests
+# Importowanie funkcji z utils/functions.py
+from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers
 # Opcjonalne: Importowanie dodatkowych komponentów
+from streamlit_extras.metric_cards import style_metric_cards
+from streamlit_elements import elements, mui, html
 # Definiowanie ścieżek do plików JSON
 FAKE_NUMBERS_FILE = os.path.join('data', 'fake_numbers.json')
         'fraud_trend_title': "Trendy Oszustw w Czasie",
         'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
         'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
+        'heatmap_title': "Heatmapa Oszustw w Czasie",
         'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
         'no_data': "Brak dostępnych danych do wyświetlenia.",
         'download_button': "📥 Pobierz dane jako CSV",
         'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
         'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
         'fraud_country_distribution': "Betrug nach Ländern",
+        'heatmap_title': "Heatmap der Betrügereien im Laufe der Zeit",
         'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
         'no_data': "Keine Daten zur Anzeige verfügbar.",
         'download_button': "📥 Daten als CSV herunterladen",
         'fraud_trend_title': "Fraud Trends Over Time",
         'risk_distribution': "Distribution of Fraud Risk Scores",
         'fraud_country_distribution': "Fraud Distribution by Countries",
+        'heatmap_title': "Fraud Heatmap Over Time",
         'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
         'no_data': "No data available to display.",
         'download_button': "📥 Download data as CSV",
     }
 }
 def main(language):
     translations = page_translations.get(language, page_translations['English'])
     st.title(translations['header'])
     st.markdown(translations['description'])
     # Pobieranie danych z plików JSON
+    stats = get_stats()
+    history = get_history()
     # Kluczowe metryki
     total_analyses = stats.get("total_analyses", 0)
     total_frauds_detected = stats.get("total_frauds_detected", 0)
     # Stylizacja kart metryk
+    style_metric_cards()
     # Wyświetlenie metryk
     col1, col2, col3 = st.columns(3)
         fraud_percentage = 0  # Ustawienie na 0% w przypadku braku analiz
     col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")
+    st.markdown("---")
+    # Dodanie interaktywnego filtra daty w bocznym panelu
+    st.sidebar.header(translations['select_date_range'])
+    start_date = st.sidebar.date_input(
         "Start Date",
+        value=datetime.now().date() - timedelta(days=translations['recent_days']),
+        min_value=datetime.now().date() - timedelta(days=365),
+        max_value=datetime.now().date()
     )
+    end_date = st.sidebar.date_input(
         "End Date",
+        value=datetime.now().date(),
         min_value=start_date,
+        max_value=datetime.now().date()
     )
     # Filtracja historii na podstawie daty
         mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
         df_filtered = df_history.loc[mask]
+        # Wyświetlenie tabeli historii analiz
+        st.markdown(f"### {translations['history_title']}")
+        if not df_filtered.empty:
+            st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)
+            # Opcjonalnie: Dodanie możliwości eksportu danych
+            csv = df_filtered.to_csv(index=False).encode('utf-8')
+            st.download_button(
+                label=translations['download_button'],
+                data=csv,
+                file_name='analysis_history.csv',
+                mime='text/csv',
+            )
+        else:
+            st.info(translations['no_data'])
+        st.markdown("---")
         # Aktualizacja statystyk na podstawie filtrowanej historii
         total_filtered = df_filtered.shape[0]
+        frauds_filtered = df_filtered['risk_assessment'].apply(lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
         fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0
         # Wyświetlenie metryk dla filtrowanej historii
         def extract_risk_score(risk_assessment):
             match = re.search(r'(\d+)/10', risk_assessment)
             return int(match.group(1)) if match else 0
         df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
         risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
         risk_distribution.columns = ['risk_score', 'count']
         def get_country(row):
             country, _ = get_phone_info(row['phone_number'])
             return country if country else "Unknown"
         df_filtered['country'] = df_filtered.apply(get_country, axis=1)
         fraud_countries = df_filtered['country'].value_counts().reset_index()
         fraud_countries.columns = ['country', 'counts']
         # Dodanie kodów krajów
+        fraud_countries['iso_alpha'] = fraud_countries['country'].apply(lambda x: pycountry.countries.lookup(x).alpha_3 if x != "Unknown" else None)
         fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
         if not fraud_countries.empty:
             fig_map = px.choropleth(
                 fraud_countries,
                 color_continuous_scale=px.colors.sequential.Plasma,
                 title=translations['fraud_country_distribution_title']
             )
+            fig_map.update_geos(showcountries=True, showcoastlines=True)
             st.plotly_chart(fig_map, use_container_width=True)
         else:
             st.info(translations['no_data'])
+        st.markdown("---")
         # Dodatkowe Wizualizacje
         st.markdown("### " + translations['heatmap_title'])
         # Heatmapa oszustw na podstawie lokalizacji
         if not fraud_countries.empty:
             # Przygotowanie danych geograficznych
+            # Użyjemy szerokości i długości geograficznej krajów
+            country_coords = {}
+            for country in fraud_countries['country'].unique():
+                if country == "Unknown":
+                    country_coords[country] = (0, 0)  # Centrum świata
+                else:
+                    try:
+                        country_obj = pycountry.countries.lookup(country)
+                        # Użyjemy średnich szerokości i długości geograficznej
+                        geocode_url = f"https://restcountries.com/v3.1/name/{country}"
+                        response = requests.get(geocode_url)
+                        if response.status_code == 200:
+                            data = response.json()
+                            lat = data[0]['latlng'][0]
+                            lon = data[0]['latlng'][1]
+                            country_coords[country] = (lat, lon)
+                        else:
+                            country_coords[country] = (0, 0)
+                    except:
+                        country_coords[country] = (0, 0)
+            fraud_countries['lat'] = fraud_countries['country'].apply(lambda x: country_coords.get(x, (0,0))[0])
+            fraud_countries['lon'] = fraud_countries['country'].apply(lambda x: country_coords.get(x, (0,0))[1])
             # Tworzenie Heatmapy
             fig_heatmap = px.density_mapbox(
                 fraud_countries,
                 mapbox_style="stamen-terrain",
                 title=translations['heatmap_title']
             )
+            fig_heatmap.update_geos(showcountries=True, showcoastlines=True)
             st.plotly_chart(fig_heatmap, use_container_width=True)
         else:
             st.info(translations['no_data'])
         # Gauge Chart - Procentowy udział oszustw
         st.markdown("### " + translations['fraud_percentage'])
         fig_gauge = go.Figure(go.Indicator(
             }
         ))
         st.plotly_chart(fig_gauge, use_container_width=True)