rafaldembski commited on
Commit
0016b8d
verified
1 Parent(s): 21adf53

Update utils/functions.py

Browse files
Files changed (1) hide show
  1. utils/functions.py +77 -96
utils/functions.py CHANGED
@@ -23,6 +23,7 @@ logging.basicConfig(
23
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
24
  DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
25
  FAKE_NUMBERS_FILE = os.path.join(DATA_DIR, 'fake_numbers.json')
 
26
 
27
  # Upewnij si臋, 偶e katalog 'data' istnieje
28
  os.makedirs(DATA_DIR, exist_ok=True)
@@ -409,121 +410,101 @@ Provide your analysis and conclusions following the guidelines above."""
409
  logging.error(f"B艂膮d po艂膮czenia z API: {e}")
410
  return f"B艂膮d po艂膮czenia z API: {e}", "B艂膮d analizy.", "B艂膮d analizy."
411
 
412
- # Funkcje analizy stron internetowych
413
 
414
- def check_url_with_phishtank(url_to_check, phishtank_api_key):
415
  """
416
- Sprawdza, czy podany URL znajduje si臋 w bazie PhishTank.
417
  """
418
- if not phishtank_api_key:
419
- logging.error("Brak klucza API PhishTank.")
420
- return False, "Brak klucza API PhishTank."
421
-
422
- api_url = "https://checkurl.phishtank.com/checkurl/"
423
- data = {
424
- 'format': 'json',
425
- 'app_key': phishtank_api_key,
426
- 'url': url_to_check
427
- }
428
-
429
  try:
430
- response = requests.post(api_url, data=data)
431
  if response.status_code == 200:
432
- result = response.json()
433
- is_phish = result.get('results', {}).get('valid', False)
434
- return is_phish, None
435
  else:
436
- logging.error(f"B艂膮d podczas sprawdzania URL w PhishTank: {response.status_code}")
437
- return False, f"B艂膮d podczas sprawdzania URL w PhishTank: {response.status_code}"
438
- except Exception as e:
439
- logging.error(f"B艂膮d podczas sprawdzania URL w PhishTank: {e}")
440
- return False, f"B艂膮d podczas sprawdzania URL w PhishTank: {e}"
441
 
442
- def get_google_safe_browsing(url_to_check, google_api_key):
443
  """
444
- Sprawdza, czy URL jest bezpieczny za pomoc膮 Google Safe Browsing API.
445
  """
446
- if not google_api_key:
447
- logging.error("Brak klucza API Google Safe Browsing.")
448
- return False, "Brak klucza API Google Safe Browsing."
449
-
450
- api_url = "https://safebrowsing.googleapis.com/v4/threatMatches:find"
451
- params = {
452
- 'key': google_api_key
453
- }
454
- payload = {
455
- "client": {
456
- "clientId": "yourcompanyname",
457
- "clientVersion": "1.5.2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  },
459
- "threatInfo": {
460
- "threatTypes": ["MALWARE", "SOCIAL_ENGINEERING"],
461
- "platformTypes": ["WINDOWS"],
462
- "threatEntryTypes": ["URL"],
463
- "threatEntries": [
464
- {"url": url_to_check}
465
- ]
466
  }
467
  }
468
-
469
  try:
470
- response = requests.post(api_url, params=params, json=payload)
 
 
 
 
471
  if response.status_code == 200:
472
- result = response.json()
473
- is_threat = bool(result)
474
- return is_threat, None
475
  else:
476
- logging.error(f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {response.status_code}")
477
- return False, f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {response.status_code}"
478
  except Exception as e:
479
  logging.error(f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {e}")
480
- return False, f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {e}"
481
 
482
- def analyze_url(url, phishtank_api_key, google_api_key):
483
  """
484
- Analizuje stron臋 internetow膮 pod k膮tem phishingu i innych zagro偶e艅.
485
  """
486
- # Sprawdzenie PhishTank
487
- is_phish, error = check_url_with_phishtank(url, phishtank_api_key)
488
- if error:
489
- phishing_result = error
490
- elif is_phish:
491
- phishing_result = "Zagro偶enie phishingiem wykryte w PhishTank."
492
- else:
493
- phishing_result = "Brak zagro偶e艅 phishingowych wed艂ug PhishTank."
494
-
495
- # Sprawdzenie Google Safe Browsing
496
- is_threat, error = get_google_safe_browsing(url, google_api_key)
497
- if error:
498
- google_result = error
499
- elif is_threat:
500
- google_result = "Zagro偶enie wykryte przez Google Safe Browsing."
501
- else:
502
- google_result = "Brak zagro偶e艅 wed艂ug Google Safe Browsing."
503
-
504
- # Analiza zawarto艣ci strony
505
- try:
506
- response = requests.get(url)
507
- if response.status_code == 200:
508
- content = response.text[:500] # Pobierz pierwsze 500 znak贸w
509
- logging.info(f"Pobrano zawarto艣膰 strony {url}.")
510
- return phishing_result, google_result, content
511
- else:
512
- logging.error(f"Nie uda艂o si臋 pobra膰 zawarto艣ci strony {url}. Kod statusu HTTP: {response.status_code}")
513
- return f"Nie uda艂o si臋 pobra膰 zawarto艣ci strony. Kod statusu HTTP: {response.status_code}", google_result, ""
514
- except Exception as e:
515
- logging.error(f"B艂膮d podczas pobierania URL {url}: {e}")
516
- return f"B艂膮d podczas pobierania URL: {e}", google_result, ""
517
-
518
- def extract_text_from_image(image_file):
519
  """
520
- Ekstraktuje tekst z obrazu za pomoc膮 Tesseract OCR.
521
  """
522
- try:
523
- image = Image.open(image_file)
524
- text = pytesseract.image_to_string(image, lang='pol') # Upewnij si臋, 偶e masz zainstalowane j臋zyki dla Tesseract
525
- logging.info("Tekst zosta艂 wyodr臋bniony z obrazu.")
526
- return text
527
- except Exception as e:
528
- logging.error(f"B艂膮d podczas ekstrakcji tekstu z obrazu: {e}")
529
- return f"B艂膮d podczas ekstrakcji tekstu z obrazu: {e}"
 
23
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
24
  DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
25
  FAKE_NUMBERS_FILE = os.path.join(DATA_DIR, 'fake_numbers.json')
26
+ STATS_FILE = os.path.join(DATA_DIR, 'stats.json')
27
 
28
  # Upewnij si臋, 偶e katalog 'data' istnieje
29
  os.makedirs(DATA_DIR, exist_ok=True)
 
410
  logging.error(f"B艂膮d po艂膮czenia z API: {e}")
411
  return f"B艂膮d po艂膮czenia z API: {e}", "B艂膮d analizy.", "B艂膮d analizy."
412
 
413
+ # Funkcje analizy strony internetowej
414
 
415
+ def analyze_website(url, language):
416
  """
417
+ Analizuje zawarto艣膰 strony internetowej pod k膮tem zagro偶e艅.
418
  """
 
 
 
 
 
 
 
 
 
 
 
419
  try:
420
+ response = requests.get(url)
421
  if response.status_code == 200:
422
+ content = response.text
423
+ # Wstaw tu kod analizy zawarto艣ci strony
424
+ return content # Na razie zwracamy tylko zawarto艣膰
425
  else:
426
+ logging.error(f"B艂膮d podczas pobierania strony: {response.status_code}")
427
+ return "B艂膮d podczas pobierania strony."
428
+ except requests.exceptions.RequestException as e:
429
+ logging.error(f"B艂膮d 偶膮dania: {e}")
430
+ return "B艂膮d 偶膮dania."
431
 
432
+ def check_urls_with_phishtank(urls):
433
  """
434
+ Sprawdza URL w bazie PhishTank.
435
  """
436
+ phishing_urls = []
437
+ for url in urls:
438
+ params = {
439
+ 'format': 'json',
440
+ 'url': url
441
+ }
442
+ try:
443
+ response = requests.post('https://checkurl.phishtank.com/checkurl/', data=params)
444
+ if response.status_code == 200:
445
+ data = response.json()
446
+ in_database = data.get('results', {}).get('in_database', False)
447
+ valid = data.get('results', {}).get('valid', False)
448
+ if in_database and valid:
449
+ phishing_urls.append(url)
450
+ else:
451
+ logging.warning(f"B艂膮d podczas sprawdzania URL w PhishTank: {response.status_code}")
452
+ except Exception as e:
453
+ logging.error(f"B艂膮d podczas sprawdzania URL w PhishTank: {e}")
454
+ return phishing_urls
455
+
456
+ def check_urls_with_safe_browsing(urls):
457
+ """
458
+ Sprawdza URL w Google Safe Browsing.
459
+ """
460
+ api_key = os.getenv('GOOGLE_SAFE_BROWSING_API_KEY')
461
+ if not api_key:
462
+ return None
463
+ unsafe_urls = []
464
+ headers = {'Content-Type': 'application/json'}
465
+ client_body = {
466
+ 'client': {
467
+ 'clientId': 'yourcompanyname',
468
+ 'clientVersion': '1.0'
469
  },
470
+ 'threatInfo': {
471
+ 'threatTypes': ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION"],
472
+ 'platformTypes': ["ANY_PLATFORM"],
473
+ 'threatEntryTypes': ["URL"],
474
+ 'threatEntries': [{'url': url} for url in urls]
 
 
475
  }
476
  }
 
477
  try:
478
+ response = requests.post(
479
+ f'https://safebrowsing.googleapis.com/v4/threatMatches:find?key={api_key}',
480
+ headers=headers,
481
+ json=client_body
482
+ )
483
  if response.status_code == 200:
484
+ data = response.json()
485
+ matches = data.get('matches', [])
486
+ unsafe_urls = [match['threat']['url'] for match in matches]
487
  else:
488
+ logging.error(f"B艂膮d podczas komunikacji z Google Safe Browsing API: {response.status_code}")
 
489
  except Exception as e:
490
  logging.error(f"B艂膮d podczas sprawdzania URL w Google Safe Browsing: {e}")
491
+ return unsafe_urls
492
 
493
+ def update_stats(fraud_detected=False):
494
  """
495
+ Aktualizuje statystyki analiz w pliku stats.json.
496
  """
497
+ stats = load_json(STATS_FILE)
498
+ stats["total_analyses"] += 1
499
+ if fraud_detected:
500
+ stats["total_frauds_detected"] += 1
501
+ save_json(STATS_FILE, stats)
502
+ logging.info(f"Statystyki zosta艂y zaktualizowane: Analiz {stats['total_analyses']}, Oszustw {stats['total_frauds_detected']}.")
503
+
504
+ def get_stats():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  """
506
+ Pobiera statystyki analiz z pliku stats.json.
507
  """
508
+ stats = load_json(STATS_FILE)
509
+ logging.info("Statystyki zosta艂y pobrane pomy艣lnie.")
510
+ return stats