Spaces:

rafaldembski
/

ScamDetector

Running

App Files Files Community

ScamDetector / pages /Statistics.py

rafaldembski

Update pages/Statistics.py

6a02207 verified 7 months ago

raw

history blame contribute delete

14.7 kB

	# pages/Statistics.py

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import os
	import re
	from datetime import datetime, timedelta
	import pycountry
	import requests

	# Importowanie funkcji z utils/functions.py
	from utils.functions import get_phone_info, get_stats, get_history, get_fake_numbers

	# Definiowanie tłumaczeń dla zakładki "Statystyki"
	page_translations = {
	'Polish': {
	'page_title': "📊 Statystyki",
	'page_icon': "📈",
	'header': "📊 Statystyki Aplikacji",
	'description': "Poniżej znajdują się statystyki analizy wiadomości w aplikacji.",
	'total_analyses': "Liczba przeanalizowanych wiadomości",
	'total_frauds_detected': "Wykryte oszustwa",
	'fraud_percentage': "Procent oszustw",
	'history_title': "Historia Analizowanych Wiadomości",
	'frauds_over_time': "Liczba Wykrytych Oszustw w Czasie",
	'risk_distribution_title': "Rozkład Ocen Ryzyka Oszustwa",
	'fraud_country_distribution_title': "Rozkład Oszustw Według Krajów",
	'fraud_trend_title': "Trendy Oszustw w Czasie",
	'risk_distribution': "Rozkład Ocen Ryzyka Oszustwa",
	'fraud_country_distribution': "Rozkład Oszustw Według Krajów",
	'fraud_vs_nonfraud': "Procentowy Podział: Oszustwa vs Bezpieczne",
	'no_data': "Brak dostępnych danych do wyświetlenia.",
	'download_button': "📥 Pobierz dane jako CSV",
	'select_date_range': "Wybierz zakres dat:",
	'recent_days': 30 # Domyślny zakres dat
	},
	'German': {
	'page_title': "📊 Statistiken",
	'page_icon': "📈",
	'header': "📊 Anwendungsstatistiken",
	'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
	'total_analyses': "Anzahl der analysierten Nachrichten",
	'total_frauds_detected': "Erkannte Betrugsfälle",
	'fraud_percentage': "Betrugsprozentsatz",
	'history_title': "Verlauf analysierter Nachrichten",
	'frauds_over_time': "Anzahl erkannter Betrugsfälle im Laufe der Zeit",
	'risk_distribution_title': "Verteilung der Betrugsrisikobewertungen",
	'fraud_country_distribution_title': "Verteilung der Betrugsfälle nach Ländern",
	'fraud_trend_title': "Betrugstrends im Laufe der Zeit",
	'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
	'fraud_country_distribution': "Verteilung der Betrugsfälle nach Ländern",
	'fraud_vs_nonfraud': "Prozentuale Aufteilung: Betrug vs. Sicher",
	'no_data': "Keine Daten zum Anzeigen verfügbar.",
	'download_button': "📥 Daten als CSV herunterladen",
	'select_date_range': "Datumsbereich auswählen:",
	'recent_days': 30
	},
	'English': {
	'page_title': "📊 Statistics",
	'page_icon': "📈",
	'header': "📊 Application Statistics",
	'description': "Below are the statistics of message analysis in the app.",
	'total_analyses': "Total Messages Analyzed",
	'total_frauds_detected': "Frauds Detected",
	'fraud_percentage': "Fraud Percentage",
	'history_title': "History of Analyzed Messages",
	'frauds_over_time': "Number of Detected Frauds Over Time",
	'risk_distribution_title': "Distribution of Fraud Risk Scores",
	'fraud_country_distribution_title': "Fraud Distribution by Countries",
	'fraud_trend_title': "Fraud Trends Over Time",
	'risk_distribution': "Distribution of Fraud Risk Scores",
	'fraud_country_distribution': "Fraud Distribution by Countries",
	'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
	'no_data': "No data available to display.",
	'download_button': "📥 Download data as CSV",
	'select_date_range': "Select date range:",
	'recent_days': 30
	}
	}

	# Mapowanie nazw krajów na kody ISO alfa-3
	country_name_mapping = {
	'niemcy': 'DEU', # Niemcy po polsku
	'germany': 'DEU', # Niemcy po angielsku
	'deutschland': 'DEU', # Niemcy po niemiecku
	'polska': 'POL', # Polska po polsku
	'poland': 'POL', # Polska po angielsku
	'österreich': 'AUT', # Austria po niemiecku
	'austria': 'AUT', # Austria po angielsku
	'francja': 'FRA', # Francja po polsku
	'france': 'FRA', # Francja po angielsku
	'frankreich': 'FRA', # Francja po niemiecku
	'włochy': 'ITA', # Włochy po polsku
	'italy': 'ITA', # Włochy po angielsku
	'italien': 'ITA', # Włochy po niemiecku
	'hiszpania': 'ESP', # Hiszpania po polsku
	'spain': 'ESP', # Hiszpania po angielsku
	'spanien': 'ESP', # Hiszpania po niemiecku
	'stany zjednoczone': 'USA', # USA po polsku
	'usa': 'USA', # USA po angielsku
	'vereinigte staaten': 'USA',# USA po niemiecku
	'wielka brytania': 'GBR', # Wielka Brytania po polsku
	'united kingdom': 'GBR', # Wielka Brytania po angielsku
	'vereinigtes königreich': 'GBR', # Wielka Brytania po niemiecku
	'unknown': None,
	'nieznany': None,
	'unbekannt': None
	# Dodaj inne kraje w razie potrzeby
	}

	def get_iso_alpha3(country_name):
	country_code = country_name_mapping.get(country_name.lower())
	if country_code:
	return country_code
	else:
	# Jeśli nie znaleziono w mapowaniu, spróbuj użyć pycountry
	try:
	country = pycountry.countries.lookup(country_name)
	return country.alpha_3
	except LookupError:
	return None

	def main(language):
	translations = page_translations.get(language, page_translations['Polish'])

	st.title(translations['header'])
	st.markdown(translations['description'])

	# Pobieranie danych z plików JSON
	try:
	stats = get_stats()
	history = get_history()
	except Exception as e:
	st.error(f"{translations['no_data']} ({e})")
	st.stop()

	# Kluczowe metryki
	total_analyses = stats.get("total_analyses", 0)
	total_frauds_detected = stats.get("total_frauds_detected", 0)
	if total_analyses > 0:
	fraud_percentage = (total_frauds_detected / total_analyses) * 100
	else:
	fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz

	# Wyświetlenie metryk za pomocą st.metric()
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric(label=translations['total_analyses'], value=str(total_analyses))
	with col2:
	st.metric(label=translations['total_frauds_detected'], value=str(total_frauds_detected))
	with col3:
	st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")

	st.markdown("---")

	# Dodanie interaktywnego filtra daty w głównym obszarze
	st.header(translations['select_date_range'])
	try:
	col_start, col_end = st.columns(2)
	with col_start:
	start_date = st.date_input(
	translations['select_date_range'] + " - " + "Start",
	value=datetime.now().date() - timedelta(days=translations['recent_days']),
	min_value=datetime.now().date() - timedelta(days=365),
	max_value=datetime.now().date()
	)
	with col_end:
	end_date = st.date_input(
	translations['select_date_range'] + " - " + "End",
	value=datetime.now().date(),
	min_value=start_date,
	max_value=datetime.now().date()
	)
	except Exception as e:
	st.error(f"{translations['no_data']} ({e})")
	st.stop()

	# Filtracja historii na podstawie daty
	if history:
	try:
	df_history = pd.DataFrame(history)
	# Upewnij się, że 'timestamp' jest w formacie datetime
	df_history['timestamp'] = pd.to_datetime(df_history['timestamp'], errors='coerce')
	# Usunięcie wpisów z błędnymi datami
	df_history = df_history.dropna(subset=['timestamp'])
	# Filtracja na podstawie daty
	mask = (df_history['timestamp'].dt.date >= start_date) & (df_history['timestamp'].dt.date <= end_date)
	df_filtered = df_history.loc[mask]
	except Exception as e:
	st.error(f"{translations['no_data']} ({e})")
	st.stop()

	# Wyświetlenie tabeli historii analiz
	st.markdown(f"### {translations['history_title']}")
	if not df_filtered.empty:
	st.dataframe(df_filtered[['timestamp', 'phone_number', 'risk_assessment']], height=300)

	# Opcjonalnie: Dodanie możliwości eksportu danych
	csv = df_filtered.to_csv(index=False).encode('utf-8')
	st.download_button(
	label=translations['download_button'],
	data=csv,
	file_name='analysis_history.csv',
	mime='text/csv',
	)
	else:
	st.info(translations['no_data'])

	st.markdown("---")

	# Aktualizacja statystyk na podstawie filtrowanej historii
	total_filtered = df_filtered.shape[0]
	frauds_filtered = df_filtered['risk_assessment'].apply(
	lambda x: int(re.search(r'(\d+)/10', x).group(1)) >= 7 if re.search(r'(\d+)/10', x) else False).sum()
	fraud_percentage_filtered = (frauds_filtered / total_filtered) * 100 if total_filtered > 0 else 0

	# Wyświetlenie metryk dla filtrowanej historii
	st.markdown("### " + translations['fraud_trend_title'])
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric(label=translations['total_analyses'], value=str(total_filtered))
	with col2:
	st.metric(label=translations['total_frauds_detected'], value=str(frauds_filtered))
	with col3:
	st.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage_filtered:.2f}%")

	# Wizualizacja procentowego podziału oszustw
	st.markdown("### " + translations['fraud_vs_nonfraud'])
	fraud_data = [frauds_filtered, total_filtered - frauds_filtered]
	fraud_labels = [translations['total_frauds_detected'], translations['total_analyses']]
	fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3,
	marker_colors=['#FF6347', '#4682B4'])])
	fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
	st.plotly_chart(fig_fraud_pie, use_container_width=True)

	# Trend oszustw w czasie
	st.markdown("### " + translations['frauds_over_time'])
	fraud_over_time = df_filtered.groupby(df_filtered['timestamp'].dt.date)['phone_number'].count().reset_index()
	fraud_over_time.rename(columns={'timestamp': 'Date', 'phone_number': translations['total_frauds_detected']}, inplace=True)
	fig_trend = px.line(fraud_over_time, x='Date', y=translations['total_frauds_detected'], title=translations['frauds_over_time'],
	labels={'Date': translations['select_date_range'], translations['total_frauds_detected']: translations['total_frauds_detected']}, markers=True)
	fig_trend.update_traces(line=dict(color='firebrick'))
	st.plotly_chart(fig_trend, use_container_width=True)

	# Rozkład ocen ryzyka
	st.markdown("### " + translations['risk_distribution_title'])

	def extract_risk_score(risk_assessment):
	match = re.search(r'(\d+)/10', risk_assessment)
	return int(match.group(1)) if match else 0

	df_filtered['risk_score'] = df_filtered['risk_assessment'].apply(extract_risk_score)
	risk_distribution = df_filtered['risk_score'].value_counts().sort_index().reset_index()
	risk_distribution.columns = ['risk_score', 'count']
	fig_risk = px.bar(risk_distribution, x='risk_score', y='count', title=translations['risk_distribution'],
	labels={'risk_score': translations['risk_distribution'], 'count': translations['total_analyses']},
	color='risk_score', color_continuous_scale=px.colors.sequential.RdBu)
	st.plotly_chart(fig_risk, use_container_width=True)

	# Rozkład oszustw według krajów
	st.markdown("### " + translations['fraud_country_distribution_title'])

	def get_country(row):
	country, _ = get_phone_info(row['phone_number'])
	return country if country else "Unknown"

	df_filtered['country'] = df_filtered.apply(get_country, axis=1)
	fraud_countries = df_filtered['country'].value_counts().reset_index()
	fraud_countries.columns = ['country', 'counts']

	# Dodanie kodów krajów
	fraud_countries['iso_alpha'] = fraud_countries['country'].apply(
	lambda x: get_iso_alpha3(x) if x != "Unknown" else None)
	fraud_countries = fraud_countries.dropna(subset=['iso_alpha'])

	if not fraud_countries.empty:
	fig_map = px.choropleth(
	fraud_countries,
	locations='iso_alpha',
	color='counts',
	hover_name='country',
	color_continuous_scale=px.colors.sequential.Plasma,
	title=translations['fraud_country_distribution_title']
	)
	fig_map.update_geos(showcountries=True, showcoastlines=True)
	st.plotly_chart(fig_map, use_container_width=True)
	else:
	st.info(translations['no_data'])

	st.markdown("---")

	# Gauge Chart - Procentowy udział oszustw
	st.markdown("### " + translations['fraud_percentage'])
	fig_gauge = go.Figure(go.Indicator(
	mode="gauge+number",
	value=fraud_percentage_filtered,
	title={'text': translations['fraud_percentage']},
	gauge={
	'axis': {'range': [0, 100]},
	'bar': {'color': "darkblue"},
	'steps': [
	{'range': [0, 20], 'color': "#55efc4"},
	{'range': [20, 40], 'color': "#81ecec"},
	{'range': [40, 60], 'color': "#74b9ff"},
	{'range': [60, 80], 'color': "#a29bfe"},
	{'range': [80, 100], 'color': "#d63031"}
	],
	'threshold': {
	'line': {'color': "red", 'width': 4},
	'thickness': 0.75,
	'value': 70
	}
	}
	))
	st.plotly_chart(fig_gauge, use_container_width=True)

	else:
	st.info(translations['no_data'])

	# Upewnij się, że st.set_page_config() jest wywoływane tylko w app.py