gavinzli commited on
Commit
48adbee
·
1 Parent(s): f313f6f

Handle LangDetectException in crawl_by_url function to improve error handling

Browse files
Files changed (1) hide show
  1. controllers/utils.py +3 -2
controllers/utils.py CHANGED
@@ -16,7 +16,7 @@ import pandas as pd
16
  import requests
17
  from dotenv import load_dotenv
18
  from deep_translator import GoogleTranslator, exceptions
19
- from langdetect import detect
20
  from lxml import etree
21
  import PyPDF2
22
  from transformers import pipeline
@@ -682,7 +682,8 @@ def crawl_by_url(url, article):
682
  for element in contentcn.split("。"):
683
  contenteng += translate(element) + '. '
684
  except (requests.exceptions.RequestException, requests.exceptions.ReadTimeout,
685
- PyPDF2.errors.PdfReadError, PyPDF2.errors.DependencyError) as e:
 
686
  print(f"An unexpected error occurred: {e}")
687
  article['content'] = repr(contenteng)[1:-1].strip()
688
  try:
 
16
  import requests
17
  from dotenv import load_dotenv
18
  from deep_translator import GoogleTranslator, exceptions
19
+ from langdetect import detect, lang_detect_exception
20
  from lxml import etree
21
  import PyPDF2
22
  from transformers import pipeline
 
682
  for element in contentcn.split("。"):
683
  contenteng += translate(element) + '. '
684
  except (requests.exceptions.RequestException, requests.exceptions.ReadTimeout,
685
+ PyPDF2.errors.PdfReadError, PyPDF2.errors.DependencyError,
686
+ lang_detect_exception.LangDetectException) as e:
687
  print(f"An unexpected error occurred: {e}")
688
  article['content'] = repr(contenteng)[1:-1].strip()
689
  try: