Spaces:

rafaldembski
/

ScamDetector

Running

App Files Files Community

rafaldembski commited on Oct 1, 2024

Commit

bd91198

verified ·

1 Parent(s): 0efc6d0

Update utils/functions.py

Browse files

Files changed (1) hide show

utils/functions.py +86 -28

utils/functions.py CHANGED Viewed

@@ -1,15 +1,16 @@
 # utils/functions.py
 import phonenumbers
-from phonenumbers import geocoder, carrier
 import re
 import requests
 import os
 from datetime import datetime
 import logging
 import json
-import pycountry  # Upewnij się, że zainstalowałeś tę bibliotekę: pip install pycountry
 # Konfiguracja logowania
 logging.basicConfig(
@@ -25,13 +26,16 @@ FAKE_NUMBERS_FILE = os.path.join(DATA_DIR, 'fake_numbers.json')
 HISTORY_FILE = os.path.join(DATA_DIR, 'history.json')
 STATS_FILE = os.path.join(DATA_DIR, 'stats.json')
 # Funkcje pomocnicze
 def load_json(file_path):
     """Ładuje dane z pliku JSON. Jeśli plik nie istnieje, zwraca pustą listę lub domyślny obiekt."""
     if not os.path.exists(file_path):
         if file_path.endswith('stats.json'):
-            return {"total_analyses": 0, "total_frauds_detected": 0}
         else:
             return []
     with open(file_path, 'r', encoding='utf-8') as file:
@@ -41,7 +45,7 @@ def load_json(file_path):
         except json.JSONDecodeError:
             logging.error(f"Nie można załadować danych z {file_path}. Plik jest uszkodzony.")
             if file_path.endswith('stats.json'):
-                return {"total_analyses": 0, "total_frauds_detected": 0}
             return []
 def save_json(file_path, data):
@@ -80,21 +84,24 @@ def get_fake_numbers():
     fake_numbers = load_json(FAKE_NUMBERS_FILE)
     return fake_numbers
-def add_to_history(message, phone_number, analysis, risk, recommendations):
     """
     Dodaje wpis do historii analiz w pliku history.json.
     """
     history = load_json(HISTORY_FILE)
     history.append({
-        "timestamp": datetime.now().isoformat(),
-        "message": message,
-        "phone_number": phone_number,
-        "analysis": analysis,
-        "risk_assessment": risk,
-        "recommendations": recommendations
     })
     save_json(HISTORY_FILE, history)
-    logging.info(f"Dodano wpis do history.json dla numeru {phone_number}.")
 def get_history():
     """
@@ -109,11 +116,13 @@ def update_stats(fraud_detected=False):
     Aktualizuje statystyki analiz w pliku stats.json.
     """
     stats = load_json(STATS_FILE)
-    stats["total_analyses"] += 1
     if fraud_detected:
-        stats["total_frauds_detected"] += 1
     save_json(STATS_FILE, stats)
-    logging.info(f"Statystyki zostały zaktualizowane: Analiz {stats['total_analyses']}, Oszustw {stats['total_frauds_detected']}.")
 def get_stats():
     """
@@ -129,21 +138,21 @@ def get_phone_info(phone_number):
     """
     try:
         parsed_number = phonenumbers.parse(phone_number, None)
-        country = geocoder.description_for_number(parsed_number, 'pl')  # Zmiana na 'pl' dla polskiego
-        operator = carrier.name_for_number(parsed_number, 'pl')  # Zmiana na 'pl' dla polskiego
         if not country:
             country = "Nieznany"
         if not operator:
             operator = "Nieznany"
         logging.info(f"Numer {phone_number} - Kraj: {country}, Operator: {operator}.")
         return country, operator
-    except phonenumbers.NumberParseException as e:
         logging.error(f"Nie udało się przetworzyć numeru telefonu {phone_number}: {e}")
         return "Nieznany", "Nieznany"
 def simple_checks(message, language):
     """
-    Przeprowadza proste sprawdzenia heurystyczne wiadomości SMS.
     """
     warnings = []
     # Baza słów kluczowych (polski, niemiecki, angielski)
@@ -165,9 +174,9 @@ def simple_checks(message, language):
         warnings.append("Wiadomość zawiera prośbę o poufne informacje.")
     return warnings
-def analyze_message(message, phone_number, additional_info, api_key, language):
     """
-    Analizuje wiadomość SMS za pomocą API SambaNova.
     """
     if not api_key:
         logging.error("Brak klucza API.")
@@ -181,7 +190,7 @@ def analyze_message(message, phone_number, additional_info, api_key, language):
     system_prompts = {
         'Polish': """
-Jesteś zaawansowanym asystentem AI specjalizującym się w identyfikacji fałszywych wiadomości SMS. Twoim zadaniem jest przeprowadzenie szczegółowej analizy wiadomości, wykorzystując głęboki proces myślenia i dostarczając kompleksową ocenę. Twoja odpowiedź powinna być podzielona na trzy sekcje:
 <analysis>
 **Analiza Treści Wiadomości:**
@@ -206,7 +215,7 @@ Jesteś zaawansowanym asystentem AI specjalizującym się w identyfikacji fałsz
 Twoja odpowiedź powinna być sformatowana dokładnie w powyższy sposób, używając znaczników <analysis>, <risk_assessment> i <recommendations>. Upewnij się, że każda sekcja jest wypełniona kompletnie i szczegółowo.
         """,
         'German': """
-Du bist ein fortgeschrittener KI-Assistent, spezialisiert auf die Identifizierung gefälschter SMS-Nachrichten. Deine Aufgabe ist es, eine detaillierte Analyse der Nachricht durchzuführen, indem du einen tiefgreifenden Denkprozess nutzt und eine umfassende Bewertung lieferst. Deine Antwort sollte in drei Abschnitte unterteilt sein:
 <analysis>
 **Nachrichteninhaltsanalyse:**
@@ -231,7 +240,7 @@ Du bist ein fortgeschrittener KI-Assistent, spezialisiert auf die Identifizierun
 Deine Antwort sollte genau nach den oben genannten Richtlinien formatiert sein und die Markierungen <analysis>, <risk_assessment> und <recommendations> verwenden. Stelle sicher, dass jeder Abschnitt vollständig und detailliert ausgefüllt ist.
         """,
         'English': """
-You are an advanced AI assistant specializing in identifying fake SMS messages. Your task is to conduct a detailed analysis of the message, utilizing a deep thinking process and providing a comprehensive assessment. Your response should be divided into three sections:
 <analysis>
 **Message Content Analysis:**
@@ -257,12 +266,12 @@ Your response should be formatted exactly as specified above, using the <analysi
         """
     }
-    system_prompt = system_prompts.get(language, system_prompts['English'])  # Domyślnie angielski, jeśli język nie jest obsługiwany
     user_prompt = f"""Analyze the following message for potential fraud:
-Message: "{message}"
-Sender's Phone Number: "{phone_number}"
 Additional Information:
 {additional_info}
@@ -302,3 +311,52 @@ Provide your analysis and conclusions following the guidelines above."""
     except Exception as e:
         logging.error(f"Błąd połączenia z API: {e}")
         return f"Błąd połączenia z API: {e}", "Błąd analizy.", "Błąd analizy."

 # utils/functions.py
 import phonenumbers
+from phonenumbers import geocoder, carrier, NumberParseException
 import re
 import requests
 import os
 from datetime import datetime
 import logging
 import json
+import whois
+from PIL import Image
+import pytesseract
 # Konfiguracja logowania
 logging.basicConfig(
 HISTORY_FILE = os.path.join(DATA_DIR, 'history.json')
 STATS_FILE = os.path.join(DATA_DIR, 'stats.json')
+# Upewnij się, że katalog 'data' istnieje
+os.makedirs(DATA_DIR, exist_ok=True)
 # Funkcje pomocnicze
 def load_json(file_path):
     """Ładuje dane z pliku JSON. Jeśli plik nie istnieje, zwraca pustą listę lub domyślny obiekt."""
     if not os.path.exists(file_path):
         if file_path.endswith('stats.json'):
+            return {"total": 0, "fraud": 0, "safe": 0}
         else:
             return []
     with open(file_path, 'r', encoding='utf-8') as file:
         except json.JSONDecodeError:
             logging.error(f"Nie można załadować danych z {file_path}. Plik jest uszkodzony.")
             if file_path.endswith('stats.json'):
+                return {"total": 0, "fraud": 0, "safe": 0}
             return []
 def save_json(file_path, data):
     fake_numbers = load_json(FAKE_NUMBERS_FILE)
     return fake_numbers
+def add_to_history(content, sender_info, analysis_text, risk_text, recommendations_text):
     """
     Dodaje wpis do historii analiz w pliku history.json.
     """
     history = load_json(HISTORY_FILE)
     history.append({
+        "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "type": "Analysis",
+        "details": {
+            "content": content,
+            "sender_info": sender_info,
+            "analysis": analysis_text,
+            "risk_assessment": risk_text,
+            "recommendations": recommendations_text
+        }
     })
     save_json(HISTORY_FILE, history)
+    logging.info(f"Dodano wpis do history.json dla nadawcy {sender_info}.")
 def get_history():
     """
     Aktualizuje statystyki analiz w pliku stats.json.
     """
     stats = load_json(STATS_FILE)
+    stats["total"] += 1
     if fraud_detected:
+        stats["fraud"] += 1
+    else:
+        stats["safe"] += 1
     save_json(STATS_FILE, stats)
+    logging.info(f"Statystyki zostały zaktualizowane: Analiz {stats['total']}, Oszustw {stats['fraud']}.")
 def get_stats():
     """
     """
     try:
         parsed_number = phonenumbers.parse(phone_number, None)
+        country = geocoder.description_for_number(parsed_number, 'pl')
+        operator = carrier.name_for_number(parsed_number, 'pl')
         if not country:
             country = "Nieznany"
         if not operator:
             operator = "Nieznany"
         logging.info(f"Numer {phone_number} - Kraj: {country}, Operator: {operator}.")
         return country, operator
+    except NumberParseException as e:
         logging.error(f"Nie udało się przetworzyć numeru telefonu {phone_number}: {e}")
         return "Nieznany", "Nieznany"
 def simple_checks(message, language):
     """
+    Przeprowadza proste sprawdzenia heurystyczne wiadomości.
     """
     warnings = []
     # Baza słów kluczowych (polski, niemiecki, angielski)
         warnings.append("Wiadomość zawiera prośbę o poufne informacje.")
     return warnings
+def analyze_message(content, sender_info, additional_info, api_key, language):
     """
+    Analizuje treść wiadomości za pomocą modelu AI, wykorzystując system prompts.
     """
     if not api_key:
         logging.error("Brak klucza API.")
     system_prompts = {
         'Polish': """
+Jesteś zaawansowanym asystentem AI specjalizującym się w identyfikacji fałszywych wiadomości. Twoim zadaniem jest przeprowadzenie szczegółowej analizy poniższej wiadomości, wykorzystując głęboki proces myślenia i dostarczając kompleksową ocenę. Twoja odpowiedź powinna być podzielona na trzy sekcje:
 <analysis>
 **Analiza Treści Wiadomości:**
 Twoja odpowiedź powinna być sformatowana dokładnie w powyższy sposób, używając znaczników <analysis>, <risk_assessment> i <recommendations>. Upewnij się, że każda sekcja jest wypełniona kompletnie i szczegółowo.
         """,
         'German': """
+Du bist ein fortgeschrittener KI-Assistent, spezialisiert auf die Identifizierung gefälschter Nachrichten. Deine Aufgabe ist es, eine detaillierte Analyse der folgenden Nachricht durchzuführen, indem du einen tiefgreifenden Denkprozess nutzt und eine umfassende Bewertung lieferst. Deine Antwort sollte in drei Abschnitte unterteilt sein:
 <analysis>
 **Nachrichteninhaltsanalyse:**
 Deine Antwort sollte genau nach den oben genannten Richtlinien formatiert sein und die Markierungen <analysis>, <risk_assessment> und <recommendations> verwenden. Stelle sicher, dass jeder Abschnitt vollständig und detailliert ausgefüllt ist.
         """,
         'English': """
+You are an advanced AI assistant specializing in identifying fake messages. Your task is to conduct a detailed analysis of the following message, utilizing a deep thinking process and providing a comprehensive assessment. Your response should be divided into three sections:
 <analysis>
 **Message Content Analysis:**
         """
     }
+    system_prompt = system_prompts.get(language, system_prompts['English'])
     user_prompt = f"""Analyze the following message for potential fraud:
+Message: "{content}"
+Sender Information: "{sender_info}"
 Additional Information:
 {additional_info}
     except Exception as e:
         logging.error(f"Błąd połączenia z API: {e}")
         return f"Błąd połączenia z API: {e}", "Błąd analizy.", "Błąd analizy."
+def get_email_info(email_address):
+    """
+    Pobiera informacje o domenie nadawcy e-mail za pomocą zapytania WHOIS.
+    """
+    try:
+        domain = email_address.split('@')[-1]
+        w = whois.whois(domain)
+        organization = w.org if w.org else 'Nieznana'
+        country = w.country if w.country else 'Nieznany'
+        logging.info(f"Domena {domain} - Organizacja: {organization}, Kraj: {country}.")
+        return {
+            'domain': domain,
+            'organization': organization,
+            'country': country
+        }
+    except Exception as e:
+        logging.error(f"Nie udało się pobrać informacji WHOIS dla domeny {email_address}: {e}")
+        return None
+def analyze_url(url):
+    """
+    Pobiera zawartość strony internetowej pod podanym URL.
+    """
+    try:
+        response = requests.get(url)
+        if response.status_code == 200:
+            logging.info(f"Pobrano zawartość strony {url}.")
+            # Zwróć pierwsze 500 znaków treści strony
+            return response.text[:500]
+        else:
+            logging.error(f"Nie udało się pobrać zawartości strony {url}. Kod statusu HTTP: {response.status_code}")
+            return f"Nie udało się pobrać zawartości strony. Kod statusu HTTP: {response.status_code}"
+    except Exception as e:
+        logging.error(f"Błąd podczas pobierania URL {url}: {e}")
+        return f"Błąd podczas pobierania URL: {e}"
+def extract_text_from_image(image_file):
+    """
+    Ekstraktuje tekst z obrazu za pomocą Tesseract OCR.
+    """
+    try:
+        image = Image.open(image_file)
+        text = pytesseract.image_to_string(image, lang='pol')  # Upewnij się, że masz zainstalowane języki dla Tesseract
+        logging.info("Tekst został wyodrębniony z obrazu.")
+        return text
+    except Exception as e:
+        logging.error(f"Błąd podczas ekstrakcji tekstu z obrazu: {e}")
+        return f"Błąd podczas ekstrakcji tekstu z obrazu: {e}"