Spaces:
Running
Running
Update utils/functions.py
Browse files- utils/functions.py +329 -163
utils/functions.py
CHANGED
@@ -8,9 +8,9 @@ import os
|
|
8 |
from datetime import datetime
|
9 |
import logging
|
10 |
import json
|
11 |
-
import whois # Upewnij si臋, 偶e modu艂 'python-whois' jest zainstalowany
|
12 |
from PIL import Image
|
13 |
-
import pytesseract
|
14 |
|
15 |
# Konfiguracja logowania
|
16 |
logging.basicConfig(
|
@@ -23,8 +23,8 @@ logging.basicConfig(
|
|
23 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
24 |
DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
|
25 |
FAKE_NUMBERS_FILE = os.path.join(DATA_DIR, 'fake_numbers.json')
|
26 |
-
HISTORY_FILE = os.path.join(DATA_DIR, 'history.json')
|
27 |
-
STATS_FILE = os.path.join(DATA_DIR, 'stats.json')
|
28 |
|
29 |
# Upewnij si臋, 偶e katalog 'data' istnieje
|
30 |
os.makedirs(DATA_DIR, exist_ok=True)
|
@@ -54,6 +54,8 @@ def save_json(file_path, data):
|
|
54 |
json.dump(data, file, ensure_ascii=False, indent=4)
|
55 |
logging.info(f"Dane zosta艂y zapisane do {file_path}.")
|
56 |
|
|
|
|
|
57 |
def add_fake_number(phone_number):
|
58 |
"""
|
59 |
Dodaje numer telefonu do pliku fake_numbers.json jako fa艂szywy, je艣li jeszcze go tam nie ma.
|
@@ -84,78 +86,7 @@ def get_fake_numbers():
|
|
84 |
fake_numbers = load_json(FAKE_NUMBERS_FILE)
|
85 |
return fake_numbers
|
86 |
|
87 |
-
|
88 |
-
"""
|
89 |
-
Dodaje wpis do historii analiz w pliku history.json.
|
90 |
-
"""
|
91 |
-
history = load_json(HISTORY_FILE)
|
92 |
-
history.append({
|
93 |
-
"timestamp": datetime.now().isoformat(),
|
94 |
-
"message": message,
|
95 |
-
"sender_info": sender_info,
|
96 |
-
"analysis": analysis_text,
|
97 |
-
"risk_assessment": risk_text,
|
98 |
-
"recommendations": recommendations_text
|
99 |
-
})
|
100 |
-
save_json(HISTORY_FILE, history)
|
101 |
-
logging.info(f"Dodano wpis do history.json dla nadawcy {sender_info}.")
|
102 |
-
|
103 |
-
def get_history():
|
104 |
-
"""
|
105 |
-
Pobiera histori臋 analiz z pliku history.json jako list臋 s艂ownik贸w.
|
106 |
-
"""
|
107 |
-
history = load_json(HISTORY_FILE)
|
108 |
-
logging.info("Historia analiz zosta艂a pobrana pomy艣lnie.")
|
109 |
-
return history
|
110 |
-
|
111 |
-
def get_analysis_history():
|
112 |
-
"""
|
113 |
-
Alias funkcji get_history dla zgodno艣ci z innymi modu艂ami.
|
114 |
-
"""
|
115 |
-
return get_history()
|
116 |
-
|
117 |
-
def update_stats(fraud_detected=False):
|
118 |
-
"""
|
119 |
-
Aktualizuje statystyki analiz w pliku stats.json.
|
120 |
-
"""
|
121 |
-
stats = load_json(STATS_FILE)
|
122 |
-
stats["total_analyses"] += 1
|
123 |
-
if fraud_detected:
|
124 |
-
stats["total_frauds_detected"] += 1
|
125 |
-
save_json(STATS_FILE, stats)
|
126 |
-
logging.info(f"Statystyki zosta艂y zaktualizowane: Analiz {stats['total_analyses']}, Oszustw {stats['total_frauds_detected']}.")
|
127 |
-
|
128 |
-
def get_stats():
|
129 |
-
"""
|
130 |
-
Pobiera statystyki analiz z pliku stats.json.
|
131 |
-
"""
|
132 |
-
stats = load_json(STATS_FILE)
|
133 |
-
logging.info("Statystyki zosta艂y pobrane pomy艣lnie.")
|
134 |
-
return stats
|
135 |
-
|
136 |
-
def get_statistics():
|
137 |
-
"""
|
138 |
-
Alias funkcji get_stats dla zgodno艣ci z innymi modu艂ami.
|
139 |
-
"""
|
140 |
-
return get_stats()
|
141 |
-
|
142 |
-
def get_phone_info(phone_number):
|
143 |
-
"""
|
144 |
-
Weryfikuje numer telefonu i zwraca informacje o kraju i operatorze.
|
145 |
-
"""
|
146 |
-
try:
|
147 |
-
parsed_number = phonenumbers.parse(phone_number, None)
|
148 |
-
country = geocoder.description_for_number(parsed_number, 'pl')
|
149 |
-
operator = carrier.name_for_number(parsed_number, 'pl')
|
150 |
-
if not country:
|
151 |
-
country = "Nieznany"
|
152 |
-
if not operator:
|
153 |
-
operator = "Nieznany"
|
154 |
-
logging.info(f"Numer {phone_number} - Kraj: {country}, Operator: {operator}.")
|
155 |
-
return country, operator
|
156 |
-
except NumberParseException as e:
|
157 |
-
logging.error(f"Nie uda艂o si臋 przetworzy膰 numeru telefonu {phone_number}: {e}")
|
158 |
-
return "Nieznany", "Nieznany"
|
159 |
|
160 |
def simple_checks(message, language):
|
161 |
"""
|
@@ -183,7 +114,7 @@ def simple_checks(message, language):
|
|
183 |
|
184 |
def analyze_message(content, sender_info, additional_info, api_key, language):
|
185 |
"""
|
186 |
-
Analizuje tre艣膰 wiadomo艣ci za pomoc膮 modelu AI, wykorzystuj膮c system prompts.
|
187 |
"""
|
188 |
if not api_key:
|
189 |
logging.error("Brak klucza API.")
|
@@ -197,79 +128,79 @@ def analyze_message(content, sender_info, additional_info, api_key, language):
|
|
197 |
|
198 |
system_prompts = {
|
199 |
'Polish': """
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
""",
|
224 |
'German': """
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
""",
|
249 |
'English': """
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
"""
|
274 |
}
|
275 |
|
@@ -319,26 +250,242 @@ Provide your analysis and conclusions following the guidelines above."""
|
|
319 |
logging.error(f"B艂膮d po艂膮czenia z API: {e}")
|
320 |
return f"B艂膮d po艂膮czenia z API: {e}", "B艂膮d analizy.", "B艂膮d analizy."
|
321 |
|
322 |
-
|
|
|
|
|
323 |
"""
|
324 |
-
|
325 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
try:
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
'
|
334 |
-
'
|
335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
except Exception as e:
|
338 |
-
logging.error(f"
|
339 |
-
return
|
340 |
|
341 |
-
def
|
342 |
"""
|
343 |
Pobiera zawarto艣膰 strony internetowej pod podanym URL.
|
344 |
"""
|
@@ -346,8 +493,7 @@ def analyze_url(url):
|
|
346 |
response = requests.get(url)
|
347 |
if response.status_code == 200:
|
348 |
logging.info(f"Pobrano zawarto艣膰 strony {url}.")
|
349 |
-
#
|
350 |
-
return response.text[:500]
|
351 |
else:
|
352 |
logging.error(f"Nie uda艂o si臋 pobra膰 zawarto艣ci strony {url}. Kod statusu HTTP: {response.status_code}")
|
353 |
return f"Nie uda艂o si臋 pobra膰 zawarto艣ci strony. Kod statusu HTTP: {response.status_code}"
|
@@ -367,4 +513,24 @@ def extract_text_from_image(image_file):
|
|
367 |
except Exception as e:
|
368 |
logging.error(f"B艂膮d podczas ekstrakcji tekstu z obrazu: {e}")
|
369 |
return f"B艂膮d podczas ekstrakcji tekstu z obrazu: {e}"
|
370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from datetime import datetime
|
9 |
import logging
|
10 |
import json
|
11 |
+
import whois # Upewnij si臋, 偶e modu艂 'python-whois' jest zainstalowany: pip install python-whois
|
12 |
from PIL import Image
|
13 |
+
import pytesseract # Upewnij si臋, 偶e modu艂 'pytesseract' jest zainstalowany: pip install pytesseract
|
14 |
|
15 |
# Konfiguracja logowania
|
16 |
logging.basicConfig(
|
|
|
23 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
24 |
DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
|
25 |
FAKE_NUMBERS_FILE = os.path.join(DATA_DIR, 'fake_numbers.json')
|
26 |
+
HISTORY_FILE = os.path.join(DATA_DIR, 'history.json') # Je艣li nie u偶ywasz historii, mo偶esz usun膮膰 te linie
|
27 |
+
STATS_FILE = os.path.join(DATA_DIR, 'stats.json') # Je艣li nie u偶ywasz statystyk, mo偶esz usun膮膰 te linie
|
28 |
|
29 |
# Upewnij si臋, 偶e katalog 'data' istnieje
|
30 |
os.makedirs(DATA_DIR, exist_ok=True)
|
|
|
54 |
json.dump(data, file, ensure_ascii=False, indent=4)
|
55 |
logging.info(f"Dane zosta艂y zapisane do {file_path}.")
|
56 |
|
57 |
+
# Funkcje zwi膮zane z fa艂szywymi numerami telefon贸w
|
58 |
+
|
59 |
def add_fake_number(phone_number):
|
60 |
"""
|
61 |
Dodaje numer telefonu do pliku fake_numbers.json jako fa艂szywy, je艣li jeszcze go tam nie ma.
|
|
|
86 |
fake_numbers = load_json(FAKE_NUMBERS_FILE)
|
87 |
return fake_numbers
|
88 |
|
89 |
+
# Funkcje analizy SMS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
def simple_checks(message, language):
|
92 |
"""
|
|
|
114 |
|
115 |
def analyze_message(content, sender_info, additional_info, api_key, language):
|
116 |
"""
|
117 |
+
Analizuje tre艣膰 wiadomo艣ci SMS za pomoc膮 modelu AI, wykorzystuj膮c system prompts.
|
118 |
"""
|
119 |
if not api_key:
|
120 |
logging.error("Brak klucza API.")
|
|
|
128 |
|
129 |
system_prompts = {
|
130 |
'Polish': """
|
131 |
+
Jeste艣 zaawansowanym asystentem AI specjalizuj膮cym si臋 w identyfikacji fa艂szywych wiadomo艣ci SMS. Twoim zadaniem jest przeprowadzenie szczeg贸艂owej analizy wiadomo艣ci, wykorzystuj膮c g艂臋boki proces my艣lenia i dostarczaj膮c kompleksow膮 ocen臋. Twoja odpowied藕 powinna by膰 podzielona na trzy sekcje:
|
132 |
+
|
133 |
+
<analysis>
|
134 |
+
**Analiza Tre艣ci Wiadomo艣ci:**
|
135 |
+
- Przeprowad藕 szczeg贸艂ow膮 analiz臋 tre艣ci wiadomo艣ci, identyfikuj膮c potencjalne czerwone flagi, takie jak b艂臋dy j臋zykowe, pro艣by o dane osobowe, pilne pro艣by o kontakt itp.
|
136 |
+
- Opisz kontekst j臋zykowy i kulturowy wiadomo艣ci.
|
137 |
+
- Zidentyfikuj wszelkie elementy, kt贸re mog膮 sugerowa膰, 偶e wiadomo艣膰 jest pr贸b膮 wy艂udzenia informacji lub pieni臋dzy.
|
138 |
+
</analysis>
|
139 |
+
|
140 |
+
<risk_assessment>
|
141 |
+
**Ocena Ryzyka Oszustwa:**
|
142 |
+
- Na podstawie analizy tre艣ci i dost臋pnych informacji oce艅 prawdopodobie艅stwo, 偶e wiadomo艣膰 jest oszustwem. U偶yj skali od 1 do 10, gdzie 1 oznacza bardzo niskie ryzyko, a 10 bardzo wysokie ryzyko.
|
143 |
+
- Wyja艣nij, jakie czynniki wp艂ywaj膮 na t臋 ocen臋.
|
144 |
+
</risk_assessment>
|
145 |
+
|
146 |
+
<recommendations>
|
147 |
+
**Zalecenia dla U偶ytkownika:**
|
148 |
+
- Podaj jasne i konkretne zalecenia dotycz膮ce dalszych krok贸w, kt贸re u偶ytkownik powinien podj膮膰.
|
149 |
+
- Uwzgl臋dnij sugestie dotycz膮ce bezpiecze艅stwa, takie jak blokowanie nadawcy, zg艂aszanie wiadomo艣ci do odpowiednich instytucji, czy te偶 ignorowanie wiadomo艣ci.
|
150 |
+
- Je艣li to mo偶liwe, zasugeruj dodatkowe 艣rodki ostro偶no艣ci, kt贸re u偶ytkownik mo偶e podj膮膰, aby chroni膰 swoje dane osobowe i finansowe.
|
151 |
+
</recommendations>
|
152 |
+
|
153 |
+
Twoja odpowied藕 powinna by膰 sformatowana dok艂adnie w powy偶szy spos贸b, u偶ywaj膮c znacznik贸w <analysis>, <risk_assessment> i <recommendations>. Upewnij si臋, 偶e ka偶da sekcja jest wype艂niona kompletnie i szczeg贸艂owo.
|
154 |
""",
|
155 |
'German': """
|
156 |
+
Du bist ein fortgeschrittener KI-Assistent, spezialisiert auf die Identifizierung gef盲lschter SMS-Nachrichten. Deine Aufgabe ist es, eine detaillierte Analyse der Nachricht durchzuf眉hren, indem du einen tiefgreifenden Denkprozess nutzt und eine umfassende Bewertung lieferst. Deine Antwort sollte in drei Abschnitte unterteilt sein:
|
157 |
+
|
158 |
+
<analysis>
|
159 |
+
**Nachrichteninhaltsanalyse:**
|
160 |
+
- F眉hre eine detaillierte Analyse des Nachrichteninhalts durch und identifiziere potenzielle rote Flaggen wie sprachliche Fehler, Aufforderungen zur Preisgabe pers枚nlicher Daten, dringende Kontaktanfragen usw.
|
161 |
+
- Beschreibe den sprachlichen und kulturellen Kontext der Nachricht.
|
162 |
+
- Identifiziere alle Elemente, die darauf hindeuten k枚nnten, dass die Nachricht ein Versuch ist, Informationen oder Geld zu erlangen.
|
163 |
+
</analysis>
|
164 |
+
|
165 |
+
<risk_assessment>
|
166 |
+
**Betrugsrisikobewertung:**
|
167 |
+
- Basierend auf der Inhaltsanalyse und den verf眉gbaren Informationen, bewerte die Wahrscheinlichkeit, dass die Nachricht ein Betrug ist. Verwende eine Skala von 1 bis 10, wobei 1 sehr geringes Risiko und 10 sehr hohes Risiko bedeutet.
|
168 |
+
- Erkl盲re, welche Faktoren diese Bewertung beeinflussen.
|
169 |
+
</risk_assessment>
|
170 |
+
|
171 |
+
<recommendations>
|
172 |
+
**Empfehlungen f眉r den Benutzer:**
|
173 |
+
- Gib klare und konkrete Empfehlungen zu den n盲chsten Schritten, die der Benutzer unternehmen sollte.
|
174 |
+
- Ber眉cksichtige Sicherheitsempfehlungen wie das Blockieren des Absenders, das Melden der Nachricht an entsprechende Beh枚rden oder das Ignorieren der Nachricht.
|
175 |
+
- Wenn m枚glich, schlage zus盲tzliche Vorsichtsma脽nahmen vor, die der Benutzer ergreifen kann, um seine pers枚nlichen und finanziellen Daten zu sch眉tzen.
|
176 |
+
</recommendations>
|
177 |
+
|
178 |
+
Deine Antwort sollte genau nach den oben genannten Richtlinien formatiert sein und die Markierungen <analysis>, <risk_assessment> und <recommendations> verwenden. Stelle sicher, dass jeder Abschnitt vollst盲ndig und detailliert ausgef眉llt ist.
|
179 |
""",
|
180 |
'English': """
|
181 |
+
You are an advanced AI assistant specializing in identifying fake SMS messages. Your task is to conduct a detailed analysis of the message, utilizing a deep thinking process and providing a comprehensive assessment. Your response should be divided into three sections:
|
182 |
+
|
183 |
+
<analysis>
|
184 |
+
**Message Content Analysis:**
|
185 |
+
- Conduct a detailed analysis of the message content, identifying potential red flags such as language errors, requests for personal information, urgent contact requests, etc.
|
186 |
+
- Describe the linguistic and cultural context of the message.
|
187 |
+
- Identify any elements that may suggest the message is an attempt to solicit information or money.
|
188 |
+
</analysis>
|
189 |
+
|
190 |
+
<risk_assessment>
|
191 |
+
**Fraud Risk Assessment:**
|
192 |
+
- Based on the content analysis and available information, assess the likelihood that the message is fraudulent. Use a scale from 1 to 10, where 1 indicates very low risk and 10 indicates very high risk.
|
193 |
+
- Explain the factors that influence this assessment.
|
194 |
+
</risk_assessment>
|
195 |
+
|
196 |
+
<recommendations>
|
197 |
+
**User Recommendations:**
|
198 |
+
- Provide clear and concrete recommendations regarding the next steps the user should take.
|
199 |
+
- Include security suggestions such as blocking the sender, reporting the message to appropriate authorities, or ignoring the message.
|
200 |
+
- If possible, suggest additional precautionary measures the user can take to protect their personal and financial information.
|
201 |
+
</recommendations>
|
202 |
+
|
203 |
+
Your response should be formatted exactly as specified above, using the <analysis>, <risk_assessment>, and <recommendations> tags. Ensure that each section is thoroughly and comprehensively filled out.
|
204 |
"""
|
205 |
}
|
206 |
|
|
|
250 |
logging.error(f"B艂膮d po艂膮czenia z API: {e}")
|
251 |
return f"B艂膮d po艂膮czenia z API: {e}", "B艂膮d analizy.", "B艂膮d analizy."
|
252 |
|
253 |
+
# Funkcje analizy email
|
254 |
+
|
255 |
+
def analyze_email_message(content, sender_info, additional_info, api_key, language):
|
256 |
"""
|
257 |
+
Analizuje tre艣膰 wiadomo艣ci email za pomoc膮 modelu AI, wykorzystuj膮c system prompts.
|
258 |
"""
|
259 |
+
if not api_key:
|
260 |
+
logging.error("Brak klucza API.")
|
261 |
+
return "Brak klucza API.", "Brak klucza API.", "Brak klucza API."
|
262 |
+
|
263 |
+
url = "https://api.sambanova.ai/v1/chat/completions" # Upewnij si臋, 偶e to poprawny URL
|
264 |
+
headers = {
|
265 |
+
"Authorization": f"Bearer {api_key}",
|
266 |
+
"Content-Type": "application/json"
|
267 |
+
}
|
268 |
+
|
269 |
+
system_prompts = {
|
270 |
+
'Polish': """
|
271 |
+
Jeste艣 zaawansowanym asystentem AI specjalizuj膮cym si臋 w identyfikacji fa艂szywych wiadomo艣ci email. Twoim zadaniem jest przeprowadzenie szczeg贸艂owej analizy poni偶szej wiadomo艣ci email, wykorzystuj膮c g艂臋boki proces my艣lenia i dostarczaj膮c kompleksow膮 ocen臋. Twoja odpowied藕 powinna by膰 podzielona na trzy sekcje:
|
272 |
+
|
273 |
+
<analysis>
|
274 |
+
**Analiza Tre艣ci Wiadomo艣ci:**
|
275 |
+
- Przeprowad藕 szczeg贸艂ow膮 analiz臋 tre艣ci wiadomo艣ci email, identyfikuj膮c potencjalne czerwone flagi, takie jak b艂臋dy j臋zykowe, podejrzane linki, pro艣by o dane osobowe, pilne pro艣by o kontakt itp.
|
276 |
+
- Oce艅 autentyczno艣膰 adresu email nadawcy.
|
277 |
+
- Opisz kontekst j臋zykowy i kulturowy wiadomo艣ci.
|
278 |
+
- Zidentyfikuj wszelkie elementy, kt贸re mog膮 sugerowa膰, 偶e wiadomo艣膰 jest pr贸b膮 phishingu lub oszustwa.
|
279 |
+
</analysis>
|
280 |
+
|
281 |
+
<risk_assessment>
|
282 |
+
**Ocena Ryzyka Oszustwa:**
|
283 |
+
- Na podstawie analizy tre艣ci i dost臋pnych informacji oce艅 prawdopodobie艅stwo, 偶e wiadomo艣膰 email jest oszustwem. U偶yj skali od 1 do 10, gdzie 1 oznacza bardzo niskie ryzyko, a 10 bardzo wysokie ryzyko.
|
284 |
+
- Wyja艣nij, jakie czynniki wp艂ywaj膮 na t臋 ocen臋.
|
285 |
+
</risk_assessment>
|
286 |
+
|
287 |
+
<recommendations>
|
288 |
+
**Zalecenia dla U偶ytkownika:**
|
289 |
+
- Podaj jasne i konkretne zalecenia dotycz膮ce dalszych krok贸w, kt贸re u偶ytkownik powinien podj膮膰.
|
290 |
+
- Uwzgl臋dnij sugestie dotycz膮ce bezpiecze艅stwa, takie jak nieklikanie w podejrzane linki, nieotwieranie za艂膮cznik贸w, zg艂aszanie wiadomo艣ci do odpowiednich instytucji itp.
|
291 |
+
- Je艣li to mo偶liwe, zasugeruj dodatkowe 艣rodki ostro偶no艣ci, kt贸re u偶ytkownik mo偶e podj膮膰, aby chroni膰 swoje dane osobowe i finansowe.
|
292 |
+
</recommendations>
|
293 |
+
|
294 |
+
Twoja odpowied藕 powinna by膰 sformatowana dok艂adnie w powy偶szy spos贸b, u偶ywaj膮c znacznik贸w <analysis>, <risk_assessment> i <recommendations>. Upewnij si臋, 偶e ka偶da sekcja jest wype艂niona kompletnie i szczeg贸艂owo.
|
295 |
+
""",
|
296 |
+
'German': """
|
297 |
+
Du bist ein fortgeschrittener KI-Assistent, spezialisiert auf die Identifizierung gef盲lschter Nachrichtenemail. Deine Aufgabe ist es, eine detaillierte Analyse der folgenden Nachricht email durchzuf眉hren, indem du einen tiefgreifenden Denkprozess nutzt und eine umfassende Bewertung lieferst. Deine Antwort sollte in drei Abschnitte unterteilt sein:
|
298 |
+
|
299 |
+
<analysis>
|
300 |
+
**Nachrichteninhaltsanalyse:**
|
301 |
+
- F眉hre eine detaillierte Analyse des Nachrichteninhalts durch und identifiziere potenzielle rote Flaggen wie sprachliche Fehler, verd盲chtige Links, Aufforderungen zur Preisgabe pers枚nlicher Daten, dringende Kontaktanfragen usw.
|
302 |
+
- Beurteile die Authentizit盲t der E-Mail-Adresse des Absenders.
|
303 |
+
- Beschreibe den sprachlichen und kulturellen Kontext der Nachricht.
|
304 |
+
- Identifiziere alle Elemente, die darauf hindeuten k枚nnten, dass die Nachricht ein Versuch von Phishing oder Betrug ist.
|
305 |
+
</analysis>
|
306 |
+
|
307 |
+
<risk_assessment>
|
308 |
+
**Betrugsrisikobewertung:**
|
309 |
+
- Basierend auf der Inhaltsanalyse und den verf眉gbaren Informationen, bewerte die Wahrscheinlichkeit, dass die Nachricht email ein Betrug ist. Verwende eine Skala von 1 bis 10, wobei 1 sehr geringes Risiko und 10 sehr hohes Risiko bedeutet.
|
310 |
+
- Erkl盲re, welche Faktoren diese Bewertung beeinflussen.
|
311 |
+
</risk_assessment>
|
312 |
+
|
313 |
+
<recommendations>
|
314 |
+
**Empfehlungen f眉r den Benutzer:**
|
315 |
+
- Gib klare und konkrete Empfehlungen zu den n盲chsten Schritten, die der Benutzer unternehmen sollte.
|
316 |
+
- Ber眉cksichtige Sicherheitsempfehlungen wie das Nicht-Klicken auf verd盲chtige Links, das Nicht-Ot枚ffnen von Anh盲ngen, das Melden der Nachricht an entsprechende Beh枚rden usw.
|
317 |
+
- Wenn m枚glich, schlage zus盲tzliche Vorsichtsma脽nahmen vor, die der Benutzer ergreifen kann, um seine pers枚nlichen und finanziellen Daten zu sch眉tzen.
|
318 |
+
</recommendations>
|
319 |
+
|
320 |
+
Deine Antwort sollte genau nach den oben genannten Richtlinien formatiert sein und die Markierungen <analysis>, <risk_assessment> und <recommendations> verwenden. Stelle sicher, dass jeder Abschnitt vollst盲ndig und detailliert ausgef眉llt ist.
|
321 |
+
""",
|
322 |
+
'English': """
|
323 |
+
You are an advanced AI assistant specializing in identifying fake email messages. Your task is to conduct a detailed analysis of the following email message, utilizing a deep thinking process and providing a comprehensive assessment. Your response should be divided into three sections:
|
324 |
+
|
325 |
+
<analysis>
|
326 |
+
**Message Content Analysis:**
|
327 |
+
- Conduct a detailed analysis of the email message content, identifying potential red flags such as language errors, suspicious links, requests for personal information, urgent contact requests, etc.
|
328 |
+
- Assess the authenticity of the sender's email address.
|
329 |
+
- Describe the linguistic and cultural context of the message.
|
330 |
+
- Identify any elements that may suggest the message is an attempt at phishing or fraud.
|
331 |
+
</analysis>
|
332 |
+
|
333 |
+
<risk_assessment>
|
334 |
+
**Fraud Risk Assessment:**
|
335 |
+
- Based on the content analysis and available information, assess the likelihood that the email message is fraudulent. Use a scale from 1 to 10, where 1 indicates very low risk and 10 indicates very high risk.
|
336 |
+
- Explain the factors that influence this assessment.
|
337 |
+
</risk_assessment>
|
338 |
+
|
339 |
+
<recommendations>
|
340 |
+
**User Recommendations:**
|
341 |
+
- Provide clear and concrete recommendations regarding the next steps the user should take.
|
342 |
+
- Include security suggestions such as not clicking on suspicious links, not opening attachments, reporting the message to appropriate authorities, etc.
|
343 |
+
- If possible, suggest additional precautionary measures the user can take to protect their personal and financial information.
|
344 |
+
</recommendations>
|
345 |
+
|
346 |
+
Your response should be formatted exactly as specified above, using the <analysis>, <risk_assessment>, and <recommendations> tags. Ensure that each section is thoroughly and comprehensively filled out.
|
347 |
+
"""
|
348 |
+
}
|
349 |
+
|
350 |
+
system_prompt = system_prompts.get(language, system_prompts['English'])
|
351 |
+
|
352 |
+
user_prompt = f"""Analyze the following email message for potential fraud:
|
353 |
+
|
354 |
+
Email Content: "{content}"
|
355 |
+
Sender Information: "{sender_info}"
|
356 |
+
|
357 |
+
Additional Information:
|
358 |
+
{additional_info}
|
359 |
+
|
360 |
+
Provide your analysis and conclusions following the guidelines above."""
|
361 |
+
|
362 |
+
payload = {
|
363 |
+
"model": "Meta-Llama-3.1-8B-Instruct", # Upewnij si臋, 偶e to poprawny model API
|
364 |
+
"messages": [
|
365 |
+
{"role": "system", "content": system_prompt},
|
366 |
+
{"role": "user", "content": user_prompt}
|
367 |
+
],
|
368 |
+
"max_tokens": 1000,
|
369 |
+
"temperature": 0.2,
|
370 |
+
"top_p": 0.9,
|
371 |
+
"stop": ["<|eot_id|>"]
|
372 |
+
}
|
373 |
+
|
374 |
try:
|
375 |
+
response = requests.post(url, headers=headers, json=payload)
|
376 |
+
if response.status_code == 200:
|
377 |
+
data = response.json()
|
378 |
+
ai_response = data['choices'][0]['message']['content']
|
379 |
+
# Parsowanie odpowiedzi
|
380 |
+
analysis = re.search(r'<analysis>(.*?)</analysis>', ai_response, re.DOTALL)
|
381 |
+
risk_assessment = re.search(r'<risk_assessment>(.*?)</risk_assessment>', ai_response, re.DOTALL)
|
382 |
+
recommendations = re.search(r'<recommendations>(.*?)</recommendations>', ai_response, re.DOTALL)
|
383 |
+
|
384 |
+
analysis_text = analysis.group(1).strip() if analysis else "Brak analizy."
|
385 |
+
risk_text = risk_assessment.group(1).strip() if risk_assessment else "Brak oceny ryzyka."
|
386 |
+
recommendations_text = recommendations.group(1).strip() if recommendations else "Brak zalece艅."
|
387 |
+
|
388 |
+
return analysis_text, risk_text, recommendations_text
|
389 |
+
else:
|
390 |
+
logging.error(f"B艂膮d API: {response.status_code} - {response.text}")
|
391 |
+
return f"B艂膮d API: {response.status_code} - {response.text}", "B艂膮d analizy.", "B艂膮d analizy."
|
392 |
+
except Exception as e:
|
393 |
+
logging.error(f"B艂膮d po艂膮czenia z API: {e}")
|
394 |
+
return f"B艂膮d po艂膮czenia z API: {e}", "B艂膮d analizy.", "B艂膮d analizy."
|
395 |
+
|
396 |
+
# Funkcje analizy stron internetowych
|
397 |
+
|
398 |
+
def check_url_with_phishtank(url_to_check, phishtank_api_key):
|
399 |
+
"""
|
400 |
+
Sprawdza, czy podany URL znajduje si臋 w bazie PhishTank.
|
401 |
+
"""
|
402 |
+
if not phishtank_api_key:
|
403 |
+
logging.error("Brak klucza API PhishTank.")
|
404 |
+
return False, "Brak klucza API PhishTank."
|
405 |
+
|
406 |
+
api_url = "https://checkurl.phishtank.com/checkurl/"
|
407 |
+
data = {
|
408 |
+
'format': 'json',
|
409 |
+
'app_key': phishtank_api_key,
|
410 |
+
'url': url_to_check
|
411 |
+
}
|
412 |
+
|
413 |
+
try:
|
414 |
+
response = requests.post(api_url, data=data)
|
415 |
+
if response.status_code == 200:
|
416 |
+
result = response.json()
|
417 |
+
is_phish = result.get('results', {}).get('valid', False)
|
418 |
+
return is_phish, None
|
419 |
+
else:
|
420 |
+
logging.error(f"B艂膮d podczas sprawdzania URL w PhishTank: {response.status_code}")
|
421 |
+
return False, f"B艂膮d podczas sprawdzania URL w PhishTank: {response.status_code}"
|
422 |
+
except Exception as e:
|
423 |
+
logging.error(f"B艂膮d podczas sprawdzania URL w PhishTank: {e}")
|
424 |
+
return False, f"B艂膮d podczas sprawdzania URL w PhishTank: {e}"
|
425 |
+
|
426 |
+
def analyze_url(url, phishtank_api_key, google_safe_browsing_api_key):
|
427 |
+
"""
|
428 |
+
Analizuje stron臋 internetow膮 pod k膮tem zagro偶e艅 phishingowych.
|
429 |
+
"""
|
430 |
+
threats = {
|
431 |
+
"PhishTank": False,
|
432 |
+
"Google Safe Browsing": False
|
433 |
+
}
|
434 |
+
|
435 |
+
# Sprawdzenie PhishTank
|
436 |
+
is_phish, error = check_url_with_phishtank(url, phishtank_api_key)
|
437 |
+
if error:
|
438 |
+
logging.error(f"B艂膮d podczas sprawdzania URL w PhishTank: {error}")
|
439 |
+
threats["PhishTank"] = is_phish
|
440 |
+
|
441 |
+
# Sprawdzenie Google Safe Browsing
|
442 |
+
if google_safe_browsing_api_key:
|
443 |
+
google_safe_browsing_result = check_url_with_google_safe_browsing(url, google_safe_browsing_api_key)
|
444 |
+
threats["Google Safe Browsing"] = google_safe_browsing_result
|
445 |
+
else:
|
446 |
+
logging.warning("Brak klucza API Google Safe Browsing.")
|
447 |
+
|
448 |
+
# Pobranie zawarto艣ci strony
|
449 |
+
content = get_page_content(url)
|
450 |
+
|
451 |
+
return threats, content
|
452 |
+
|
453 |
+
def check_url_with_google_safe_browsing(url_to_check, google_safe_browsing_api_key):
|
454 |
+
"""
|
455 |
+
Sprawdza, czy podany URL znajduje si臋 w bazie Google Safe Browsing.
|
456 |
+
"""
|
457 |
+
api_url = f"https://safebrowsing.googleapis.com/v4/threatMatches:find?key={google_safe_browsing_api_key}"
|
458 |
+
payload = {
|
459 |
+
"client": {
|
460 |
+
"clientId": "yourcompanyname",
|
461 |
+
"clientVersion": "1.5.2"
|
462 |
+
},
|
463 |
+
"threatInfo": {
|
464 |
+
"threatTypes": ["MALWARE", "SOCIAL_ENGINEERING"],
|
465 |
+
"platformTypes": ["WINDOWS"],
|
466 |
+
"threatEntryTypes": ["URL"],
|
467 |
+
"threatEntries": [
|
468 |
+
{"url": url_to_check}
|
469 |
+
]
|
470 |
}
|
471 |
+
}
|
472 |
+
|
473 |
+
try:
|
474 |
+
response = requests.post(api_url, json=payload)
|
475 |
+
if response.status_code == 200:
|
476 |
+
result = response.json()
|
477 |
+
if "matches" in result:
|
478 |
+
return True
|
479 |
+
else:
|
480 |
+
return False
|
481 |
+
else:
|
482 |
+
logging.error(f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {response.status_code}")
|
483 |
+
return False
|
484 |
except Exception as e:
|
485 |
+
logging.error(f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {e}")
|
486 |
+
return False
|
487 |
|
488 |
+
def get_page_content(url):
|
489 |
"""
|
490 |
Pobiera zawarto艣膰 strony internetowej pod podanym URL.
|
491 |
"""
|
|
|
493 |
response = requests.get(url)
|
494 |
if response.status_code == 200:
|
495 |
logging.info(f"Pobrano zawarto艣膰 strony {url}.")
|
496 |
+
return response.text[:500] # Zwraca pierwsze 500 znak贸w
|
|
|
497 |
else:
|
498 |
logging.error(f"Nie uda艂o si臋 pobra膰 zawarto艣ci strony {url}. Kod statusu HTTP: {response.status_code}")
|
499 |
return f"Nie uda艂o si臋 pobra膰 zawarto艣ci strony. Kod statusu HTTP: {response.status_code}"
|
|
|
513 |
except Exception as e:
|
514 |
logging.error(f"B艂膮d podczas ekstrakcji tekstu z obrazu: {e}")
|
515 |
return f"B艂膮d podczas ekstrakcji tekstu z obrazu: {e}"
|
516 |
+
|
517 |
+
# Funkcje analizy email
|
518 |
+
|
519 |
+
def get_email_info(email_address):
|
520 |
+
"""
|
521 |
+
Pobiera informacje o domenie nadawcy e-mail za pomoc膮 zapytania WHOIS.
|
522 |
+
"""
|
523 |
+
try:
|
524 |
+
domain = email_address.split('@')[-1]
|
525 |
+
w = whois.whois(domain)
|
526 |
+
organization = w.org if w.org else 'Nieznana'
|
527 |
+
country = w.country if w.country else 'Nieznany'
|
528 |
+
logging.info(f"Domena {domain} - Organizacja: {organization}, Kraj: {country}.")
|
529 |
+
return {
|
530 |
+
'domain': domain,
|
531 |
+
'organization': organization,
|
532 |
+
'country': country
|
533 |
+
}
|
534 |
+
except Exception as e:
|
535 |
+
logging.error(f"Nie uda艂o si臋 pobra膰 informacji WHOIS dla domeny {email_address}: {e}")
|
536 |
+
return None
|