Handle LangDetectException in crawl_by_url function to improve error handling
Browse files- controllers/utils.py +3 -2
controllers/utils.py
CHANGED
@@ -16,7 +16,7 @@ import pandas as pd
|
|
16 |
import requests
|
17 |
from dotenv import load_dotenv
|
18 |
from deep_translator import GoogleTranslator, exceptions
|
19 |
-
from langdetect import detect
|
20 |
from lxml import etree
|
21 |
import PyPDF2
|
22 |
from transformers import pipeline
|
@@ -682,7 +682,8 @@ def crawl_by_url(url, article):
|
|
682 |
for element in contentcn.split("。"):
|
683 |
contenteng += translate(element) + '. '
|
684 |
except (requests.exceptions.RequestException, requests.exceptions.ReadTimeout,
|
685 |
-
PyPDF2.errors.PdfReadError, PyPDF2.errors.DependencyError
|
|
|
686 |
print(f"An unexpected error occurred: {e}")
|
687 |
article['content'] = repr(contenteng)[1:-1].strip()
|
688 |
try:
|
|
|
16 |
import requests
|
17 |
from dotenv import load_dotenv
|
18 |
from deep_translator import GoogleTranslator, exceptions
|
19 |
+
from langdetect import detect, lang_detect_exception
|
20 |
from lxml import etree
|
21 |
import PyPDF2
|
22 |
from transformers import pipeline
|
|
|
682 |
for element in contentcn.split("。"):
|
683 |
contenteng += translate(element) + '. '
|
684 |
except (requests.exceptions.RequestException, requests.exceptions.ReadTimeout,
|
685 |
+
PyPDF2.errors.PdfReadError, PyPDF2.errors.DependencyError,
|
686 |
+
lang_detect_exception.LangDetectException) as e:
|
687 |
print(f"An unexpected error occurred: {e}")
|
688 |
article['content'] = repr(contenteng)[1:-1].strip()
|
689 |
try:
|