Spaces:

rafaldembski
/

ScamDetector

Running

App Files Files Community

rafaldembski commited on Sep 30, 2024

Commit

82d13f9

verified ·

1 Parent(s): 87c8540

Update pages/Statistics.py

Browse files

Files changed (1) hide show

pages/Statistics.py +44 -79

pages/Statistics.py CHANGED Viewed

@@ -29,11 +29,12 @@ page_translations = {
         'frauds_over_time': "Liczba wykrytych oszustw w czasie",
         'risk_distribution': "Rozkład ocen ryzyka oszustwa",
         'fraud_country_distribution': "Rozkład oszustw według krajów",
         'heatmap_title': "Mapa ciepła oszustw w czasie",
         'fraud_vs_nonfraud': "Procentowy podział: Oszustwa vs Bezpieczne",
         'no_data': "Brak dostępnych danych do wyświetlenia.",
-        'search_placeholder': "Wyszukaj numer telefonu lub treść wiadomości",
-        'date_filter_label': "Filtruj po dacie",
         'download_button': "📥 Pobierz dane jako CSV"
     },
     'German': {
@@ -48,11 +49,12 @@ page_translations = {
         'frauds_over_time': "Anzahl der erkannten Betrügereien im Laufe der Zeit",
         'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
         'fraud_country_distribution': "Betrug nach Ländern",
         'heatmap_title': "Heatmap der Betrügereien im Laufe der Zeit",
         'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
         'no_data': "Keine Daten zur Anzeige verfügbar.",
-        'search_placeholder': "Telefonnummer oder Nachrichtentext suchen",
-        'date_filter_label': "Nach Datum filtern",
         'download_button': "📥 Daten als CSV herunterladen"
     },
     'English': {
@@ -67,11 +69,12 @@ page_translations = {
         'frauds_over_time': "Number of Detected Frauds Over Time",
         'risk_distribution': "Distribution of Fraud Risk Scores",
         'fraud_country_distribution': "Fraud Distribution by Countries",
         'heatmap_title': "Fraud Heatmap Over Time",
         'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
         'no_data': "No data available to display.",
-        'search_placeholder': "Search phone number or message content",
-        'date_filter_label': "Filter by Date",
         'download_button': "📥 Download data as CSV"
     }
 }
@@ -114,11 +117,6 @@ def main(language):
     if history:
         st.markdown(f"### {translations['history_title']}")
-        # Pole wyszukiwania
-        search_query = st.text_input(translations['search_placeholder'], '')
-        # Dodanie filtrów daty
-        st.markdown(f"### {translations['date_filter_label']}")
         df_history = pd.DataFrame(history)
         # Sprawdzenie, czy 'timestamp' istnieje
@@ -132,103 +130,70 @@ def main(language):
         # Dodanie kolumny 'date' dla wizualizacji
         df_history['date'] = df_history['timestamp'].dt.date
-        # Wybór zakresu dat
-        if language == 'Polish':
-            start_label = "Data początkowa"
-            end_label = "Data końcowa"
-        elif language == 'German':
-            start_label = "Startdatum"
-            end_label = "Enddatum"
-        else:
-            start_label = "Start Date"
-            end_label = "End Date"
-        start_date = st.date_input(start_label, df_history['date'].min())
-        end_date = st.date_input(end_label, df_history['date'].max())
-        # Filtrowanie danych po dacie
-        df_filtered = df_history[
-            (df_history['date'] >= start_date) &
-            (df_history['date'] <= end_date)
-        ]
-        # Dodatkowe filtrowanie na podstawie zapytania wyszukiwania
-        if search_query:
-            df_filtered = df_filtered[
-                df_filtered['phone_number'].str.contains(search_query, case=False, na=False) |
-                df_filtered['message'].str.contains(search_query, case=False, na=False)
-            ]
         # Wyświetlenie tabeli historii
-        st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)
         # Opcjonalnie: Dodanie możliwości eksportu danych
-        if not df_filtered.empty:
-            if language == 'Polish':
-                download_label = "📥 Pobierz dane jako CSV"
-            elif language == 'German':
-                download_label = "📥 Daten als CSV herunterladen"
-            else:
-                download_label = "📥 Download data as CSV"
-            csv = df_filtered.to_csv(index=False).encode('utf-8')
             st.download_button(
-                label=download_label,
                 data=csv,
                 file_name='analysis_history.csv',
                 mime='text/csv',
             )
-        # Wykres kołowy dla ocen ryzyka
-        st.markdown(f"### {translations['risk_distribution']}")
-        # Wyodrębnienie ocen ryzyka
         def extract_risk_score(risk_assessment):
             match = re.search(r'(\d+)/10', risk_assessment)
             return int(match.group(1)) if match else 0
-        df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
-        risk_data = df_filtered['risk_score'].value_counts().sort_index()
-        risk_labels = [f'Risk {i}/10' for i in risk_data.index]
-        fig_risk_pie = go.Figure(data=[go.Pie(labels=risk_labels, values=risk_data, hole=.3, marker_colors=px.colors.sequential.RdBu)])
-        fig_risk_pie.update_layout(title_text=translations['risk_distribution'])
-        st.plotly_chart(fig_risk_pie, use_container_width=True)
-        # Wizualizacja rozkładu oszustw według krajów
-        st.markdown(f"### {translations['fraud_country_distribution']}")
-        # Dodanie informacji o kraju do historii
         def add_country_info(row):
             country, _ = get_phone_info(row['phone_number'])
             return country
-        df_filtered['country'] = df_filtered.apply(add_country_info, axis=1)
-        if df_filtered['country'].notnull().any():
-            # Przygotowanie danych geograficznych
-            country_counts = df_filtered['country'].value_counts().reset_index()
-            country_counts.columns = ['country', 'counts']
-            # Dodanie kolumny z kodem kraju (ISO Alpha-3)
-            def get_country_code(name):
-                try:
-                    return pycountry.countries.lookup(name).alpha_3
-                except:
-                    return None
-            country_counts['iso_alpha'] = country_counts['country'].apply(get_country_code)
-            country_counts = country_counts.dropna(subset=['iso_alpha'])
-            # Tworzenie mapy choropleth bez użycia Mapbox
             fig_map = px.choropleth(
-                country_counts,
                 locations='iso_alpha',
                 color='counts',
                 hover_name='country',
                 color_continuous_scale=px.colors.sequential.Plasma,
-                title=translations['fraud_country_distribution']
             )
-            fig_map.update_geos(showcountries=True, showcoastlines=True)  # Poprawka: usunięto powtórzenie 'showcountries'
             st.plotly_chart(fig_map, use_container_width=True)
         else:
             st.info(translations['no_data'])

         'frauds_over_time': "Liczba wykrytych oszustw w czasie",
         'risk_distribution': "Rozkład ocen ryzyka oszustwa",
         'fraud_country_distribution': "Rozkład oszustw według krajów",
+        'fraud_trend_title': "Trendy oszustw w czasie",
+        'risk_distribution_title': "Rozkład ocen ryzyka oszustwa",
+        'fraud_country_distribution_title': "Rozkład oszustw według krajów",
         'heatmap_title': "Mapa ciepła oszustw w czasie",
         'fraud_vs_nonfraud': "Procentowy podział: Oszustwa vs Bezpieczne",
         'no_data': "Brak dostępnych danych do wyświetlenia.",
         'download_button': "📥 Pobierz dane jako CSV"
     },
     'German': {
         'frauds_over_time': "Anzahl der erkannten Betrügereien im Laufe der Zeit",
         'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
         'fraud_country_distribution': "Betrug nach Ländern",
+        'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
+        'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
+        'fraud_country_distribution_title': "Betrug nach Ländern",
         'heatmap_title': "Heatmap der Betrügereien im Laufe der Zeit",
         'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
         'no_data': "Keine Daten zur Anzeige verfügbar.",
         'download_button': "📥 Daten als CSV herunterladen"
     },
     'English': {
         'frauds_over_time': "Number of Detected Frauds Over Time",
         'risk_distribution': "Distribution of Fraud Risk Scores",
         'fraud_country_distribution': "Fraud Distribution by Countries",
+        'fraud_trend_title': "Fraud Trends Over Time",
+        'risk_distribution_title': "Distribution of Fraud Risk Scores",
+        'fraud_country_distribution_title': "Fraud Distribution by Countries",
         'heatmap_title': "Fraud Heatmap Over Time",
         'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
         'no_data': "No data available to display.",
         'download_button': "📥 Download data as CSV"
     }
 }
     if history:
         st.markdown(f"### {translations['history_title']}")
         df_history = pd.DataFrame(history)
         # Sprawdzenie, czy 'timestamp' istnieje
         # Dodanie kolumny 'date' dla wizualizacji
         df_history['date'] = df_history['timestamp'].dt.date
+        # Usunięcie sekcji wyszukiwania i filtrowania po dacie
         # Wyświetlenie tabeli historii
+        st.dataframe(df_history[['timestamp', 'phone_number', 'risk_assessment']], height=300)
         # Opcjonalnie: Dodanie możliwości eksportu danych
+        if not df_history.empty:
+            csv = df_history.to_csv(index=False).encode('utf-8')
             st.download_button(
+                label=translations['download_button'],
                 data=csv,
                 file_name='analysis_history.csv',
                 mime='text/csv',
             )
+        # Trend oszustw w czasie
+        st.markdown(f"### {translations['fraud_trend_title']}")
+        fraud_over_time = df_history.groupby(df_history['timestamp'].dt.date)['phone_number'].count().reset_index()
+        fraud_over_time.rename(columns={'phone_number': 'frauds_detected'}, inplace=True)
+        fig_trend = px.line(fraud_over_time, x='timestamp', y='frauds_detected', title=translations['frauds_over_time'])
+        st.plotly_chart(fig_trend, use_container_width=True)
+        # Rozkład ocen ryzyka
+        st.markdown(f"### {translations['risk_distribution_title']}")
         def extract_risk_score(risk_assessment):
             match = re.search(r'(\d+)/10', risk_assessment)
             return int(match.group(1)) if match else 0
+        df_history['risk_score'] = df_history['risk_assessment'].apply(extract_risk_score)
+        risk_distribution = df_history['risk_score'].value_counts().sort_index().reset_index()
+        risk_distribution.columns = ['risk_score', 'count']
+        fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'], labels={'risk_score': 'Risk Score', 'count': 'Number of Messages'}, color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
+        st.plotly_chart(fig_risk, use_container_width=True)
+        # Rozkład oszustw według krajów
+        st.markdown(f"### {translations['fraud_country_distribution_title']}")
         def add_country_info(row):
             country, _ = get_phone_info(row['phone_number'])
             return country
+        df_history['country'] = df_history.apply(add_country_info, axis=1)
+        fraud_countries = df_history['country'].value_counts().reset_index()
+        fraud_countries.columns = ['country', 'counts']
+        # Dodanie kodów krajów
+        def get_country_code(name):
+            try:
+                return pycountry.countries.lookup(name).alpha_3
+            except:
+                return None
+        fraud_countries['iso_alpha'] = fraud_countries['country'].apply(get_country_code)
+        fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])
+        if not fraud_countries.empty:
             fig_map = px.choropleth(
+                fraud_countries,
                 locations='iso_alpha',
                 color='counts',
                 hover_name='country',
                 color_continuous_scale=px.colors.sequential.Plasma,
+                title=translations['fraud_country_distribution_title']
             )
+            fig_map.update_geos(showcountries=True, showcoastlines=True)
             st.plotly_chart(fig_map, use_container_width=True)
         else:
             st.info(translations['no_data'])