Spaces:
Running
Running
Update utils/functions.py
Browse files- utils/functions.py +77 -96
utils/functions.py
CHANGED
@@ -23,6 +23,7 @@ logging.basicConfig(
|
|
23 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
24 |
DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
|
25 |
FAKE_NUMBERS_FILE = os.path.join(DATA_DIR, 'fake_numbers.json')
|
|
|
26 |
|
27 |
# Upewnij si臋, 偶e katalog 'data' istnieje
|
28 |
os.makedirs(DATA_DIR, exist_ok=True)
|
@@ -409,121 +410,101 @@ Provide your analysis and conclusions following the guidelines above."""
|
|
409 |
logging.error(f"B艂膮d po艂膮czenia z API: {e}")
|
410 |
return f"B艂膮d po艂膮czenia z API: {e}", "B艂膮d analizy.", "B艂膮d analizy."
|
411 |
|
412 |
-
# Funkcje analizy
|
413 |
|
414 |
-
def
|
415 |
"""
|
416 |
-
|
417 |
"""
|
418 |
-
if not phishtank_api_key:
|
419 |
-
logging.error("Brak klucza API PhishTank.")
|
420 |
-
return False, "Brak klucza API PhishTank."
|
421 |
-
|
422 |
-
api_url = "https://checkurl.phishtank.com/checkurl/"
|
423 |
-
data = {
|
424 |
-
'format': 'json',
|
425 |
-
'app_key': phishtank_api_key,
|
426 |
-
'url': url_to_check
|
427 |
-
}
|
428 |
-
|
429 |
try:
|
430 |
-
response = requests.
|
431 |
if response.status_code == 200:
|
432 |
-
|
433 |
-
|
434 |
-
return
|
435 |
else:
|
436 |
-
logging.error(f"B艂膮d podczas
|
437 |
-
return
|
438 |
-
except
|
439 |
-
logging.error(f"B艂膮d
|
440 |
-
return
|
441 |
|
442 |
-
def
|
443 |
"""
|
444 |
-
Sprawdza
|
445 |
"""
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
},
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
{"url": url_to_check}
|
465 |
-
]
|
466 |
}
|
467 |
}
|
468 |
-
|
469 |
try:
|
470 |
-
response = requests.post(
|
|
|
|
|
|
|
|
|
471 |
if response.status_code == 200:
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
else:
|
476 |
-
logging.error(f"B艂膮d podczas
|
477 |
-
return False, f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {response.status_code}"
|
478 |
except Exception as e:
|
479 |
logging.error(f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {e}")
|
480 |
-
|
481 |
|
482 |
-
def
|
483 |
"""
|
484 |
-
|
485 |
"""
|
486 |
-
|
487 |
-
|
488 |
-
if
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
# Sprawdzenie Google Safe Browsing
|
496 |
-
is_threat, error = get_google_safe_browsing(url, google_api_key)
|
497 |
-
if error:
|
498 |
-
google_result = error
|
499 |
-
elif is_threat:
|
500 |
-
google_result = "Zagro偶enie wykryte przez Google Safe Browsing."
|
501 |
-
else:
|
502 |
-
google_result = "Brak zagro偶e艅 wed艂ug Google Safe Browsing."
|
503 |
-
|
504 |
-
# Analiza zawarto艣ci strony
|
505 |
-
try:
|
506 |
-
response = requests.get(url)
|
507 |
-
if response.status_code == 200:
|
508 |
-
content = response.text[:500] # Pobierz pierwsze 500 znak贸w
|
509 |
-
logging.info(f"Pobrano zawarto艣膰 strony {url}.")
|
510 |
-
return phishing_result, google_result, content
|
511 |
-
else:
|
512 |
-
logging.error(f"Nie uda艂o si臋 pobra膰 zawarto艣ci strony {url}. Kod statusu HTTP: {response.status_code}")
|
513 |
-
return f"Nie uda艂o si臋 pobra膰 zawarto艣ci strony. Kod statusu HTTP: {response.status_code}", google_result, ""
|
514 |
-
except Exception as e:
|
515 |
-
logging.error(f"B艂膮d podczas pobierania URL {url}: {e}")
|
516 |
-
return f"B艂膮d podczas pobierania URL: {e}", google_result, ""
|
517 |
-
|
518 |
-
def extract_text_from_image(image_file):
|
519 |
"""
|
520 |
-
|
521 |
"""
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
logging.info("Tekst zosta艂 wyodr臋bniony z obrazu.")
|
526 |
-
return text
|
527 |
-
except Exception as e:
|
528 |
-
logging.error(f"B艂膮d podczas ekstrakcji tekstu z obrazu: {e}")
|
529 |
-
return f"B艂膮d podczas ekstrakcji tekstu z obrazu: {e}"
|
|
|
23 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
24 |
DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
|
25 |
FAKE_NUMBERS_FILE = os.path.join(DATA_DIR, 'fake_numbers.json')
|
26 |
+
STATS_FILE = os.path.join(DATA_DIR, 'stats.json')
|
27 |
|
28 |
# Upewnij si臋, 偶e katalog 'data' istnieje
|
29 |
os.makedirs(DATA_DIR, exist_ok=True)
|
|
|
410 |
logging.error(f"B艂膮d po艂膮czenia z API: {e}")
|
411 |
return f"B艂膮d po艂膮czenia z API: {e}", "B艂膮d analizy.", "B艂膮d analizy."
|
412 |
|
413 |
+
# Funkcje analizy strony internetowej
|
414 |
|
415 |
+
def analyze_website(url, language):
|
416 |
"""
|
417 |
+
Analizuje zawarto艣膰 strony internetowej pod k膮tem zagro偶e艅.
|
418 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
try:
|
420 |
+
response = requests.get(url)
|
421 |
if response.status_code == 200:
|
422 |
+
content = response.text
|
423 |
+
# Wstaw tu kod analizy zawarto艣ci strony
|
424 |
+
return content # Na razie zwracamy tylko zawarto艣膰
|
425 |
else:
|
426 |
+
logging.error(f"B艂膮d podczas pobierania strony: {response.status_code}")
|
427 |
+
return "B艂膮d podczas pobierania strony."
|
428 |
+
except requests.exceptions.RequestException as e:
|
429 |
+
logging.error(f"B艂膮d 偶膮dania: {e}")
|
430 |
+
return "B艂膮d 偶膮dania."
|
431 |
|
432 |
+
def check_urls_with_phishtank(urls):
|
433 |
"""
|
434 |
+
Sprawdza URL w bazie PhishTank.
|
435 |
"""
|
436 |
+
phishing_urls = []
|
437 |
+
for url in urls:
|
438 |
+
params = {
|
439 |
+
'format': 'json',
|
440 |
+
'url': url
|
441 |
+
}
|
442 |
+
try:
|
443 |
+
response = requests.post('https://checkurl.phishtank.com/checkurl/', data=params)
|
444 |
+
if response.status_code == 200:
|
445 |
+
data = response.json()
|
446 |
+
in_database = data.get('results', {}).get('in_database', False)
|
447 |
+
valid = data.get('results', {}).get('valid', False)
|
448 |
+
if in_database and valid:
|
449 |
+
phishing_urls.append(url)
|
450 |
+
else:
|
451 |
+
logging.warning(f"B艂膮d podczas sprawdzania URL w PhishTank: {response.status_code}")
|
452 |
+
except Exception as e:
|
453 |
+
logging.error(f"B艂膮d podczas sprawdzania URL w PhishTank: {e}")
|
454 |
+
return phishing_urls
|
455 |
+
|
456 |
+
def check_urls_with_safe_browsing(urls):
|
457 |
+
"""
|
458 |
+
Sprawdza URL w Google Safe Browsing.
|
459 |
+
"""
|
460 |
+
api_key = os.getenv('GOOGLE_SAFE_BROWSING_API_KEY')
|
461 |
+
if not api_key:
|
462 |
+
return None
|
463 |
+
unsafe_urls = []
|
464 |
+
headers = {'Content-Type': 'application/json'}
|
465 |
+
client_body = {
|
466 |
+
'client': {
|
467 |
+
'clientId': 'yourcompanyname',
|
468 |
+
'clientVersion': '1.0'
|
469 |
},
|
470 |
+
'threatInfo': {
|
471 |
+
'threatTypes': ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION"],
|
472 |
+
'platformTypes': ["ANY_PLATFORM"],
|
473 |
+
'threatEntryTypes': ["URL"],
|
474 |
+
'threatEntries': [{'url': url} for url in urls]
|
|
|
|
|
475 |
}
|
476 |
}
|
|
|
477 |
try:
|
478 |
+
response = requests.post(
|
479 |
+
f'https://safebrowsing.googleapis.com/v4/threatMatches:find?key={api_key}',
|
480 |
+
headers=headers,
|
481 |
+
json=client_body
|
482 |
+
)
|
483 |
if response.status_code == 200:
|
484 |
+
data = response.json()
|
485 |
+
matches = data.get('matches', [])
|
486 |
+
unsafe_urls = [match['threat']['url'] for match in matches]
|
487 |
else:
|
488 |
+
logging.error(f"B艂膮d podczas komunikacji z Google Safe Browsing API: {response.status_code}")
|
|
|
489 |
except Exception as e:
|
490 |
logging.error(f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {e}")
|
491 |
+
return unsafe_urls
|
492 |
|
493 |
+
def update_stats(fraud_detected=False):
|
494 |
"""
|
495 |
+
Aktualizuje statystyki analiz w pliku stats.json.
|
496 |
"""
|
497 |
+
stats = load_json(STATS_FILE)
|
498 |
+
stats["total_analyses"] += 1
|
499 |
+
if fraud_detected:
|
500 |
+
stats["total_frauds_detected"] += 1
|
501 |
+
save_json(STATS_FILE, stats)
|
502 |
+
logging.info(f"Statystyki zosta艂y zaktualizowane: Analiz {stats['total_analyses']}, Oszustw {stats['total_frauds_detected']}.")
|
503 |
+
|
504 |
+
def get_stats():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
"""
|
506 |
+
Pobiera statystyki analiz z pliku stats.json.
|
507 |
"""
|
508 |
+
stats = load_json(STATS_FILE)
|
509 |
+
logging.info("Statystyki zosta艂y pobrane pomy艣lnie.")
|
510 |
+
return stats
|
|
|
|
|
|
|
|
|
|