Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -51,7 +51,14 @@ def extract_text_from_url(url):
|
|
51 |
"""
|
52 |
print("[LOG] Extracting text from URL:", url)
|
53 |
try:
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
if response.status_code != 200:
|
56 |
print(f"[ERROR] Failed to fetch URL: {url} with status code {response.status_code}")
|
57 |
return ""
|
@@ -514,7 +521,7 @@ def _preprocess_text_for_tts(text: str) -> str:
|
|
514 |
filler = random.choice(['hmm,', 'well,', 'let me see,'])
|
515 |
return f"{word}..., {filler}"
|
516 |
else:
|
517 |
-
return f"{word}...,"
|
518 |
|
519 |
keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
|
520 |
text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
|
|
|
51 |
"""
|
52 |
print("[LOG] Extracting text from URL:", url)
|
53 |
try:
|
54 |
+
headers = {
|
55 |
+
"User-Agent": (
|
56 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
57 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
58 |
+
"Chrome/115.0.0.0 Safari/537.36"
|
59 |
+
)
|
60 |
+
}
|
61 |
+
response = requests.get(url, headers=headers)
|
62 |
if response.status_code != 200:
|
63 |
print(f"[ERROR] Failed to fetch URL: {url} with status code {response.status_code}")
|
64 |
return ""
|
|
|
521 |
filler = random.choice(['hmm,', 'well,', 'let me see,'])
|
522 |
return f"{word}..., {filler}"
|
523 |
else:
|
524 |
+
return f"{word}...,"
|
525 |
|
526 |
keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
|
527 |
text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
|