Spaces:
Sleeping
Sleeping
Update scraper.py
Browse files- scraper.py +3 -3
scraper.py
CHANGED
@@ -6,7 +6,7 @@ import logging
|
|
6 |
import os
|
7 |
|
8 |
|
9 |
-
def get_text(url, n_words=15):
|
10 |
try:
|
11 |
# geckodriver_path ='/home/user/app/geckodriver'
|
12 |
# os.environ['PATH'] += ':' + geckodriver_path
|
@@ -46,13 +46,13 @@ def get_text(url, n_words=15):
|
|
46 |
return "", err_msg
|
47 |
|
48 |
|
49 |
-
def scrape_text(url, n_words=15,max_retries=2):
|
50 |
scraped_text = ""
|
51 |
scrape_error = ""
|
52 |
try:
|
53 |
n_tries = 1
|
54 |
while (n_tries <= max_retries) and (scraped_text == ""):
|
55 |
-
scraped_text, scrape_error = get_text(url=url, n_words=n_words)
|
56 |
n_tries += 1
|
57 |
return scraped_text, scrape_error
|
58 |
except Exception as e:
|
|
|
6 |
import os
|
7 |
|
8 |
|
9 |
+
async def get_text(url, n_words=15):
|
10 |
try:
|
11 |
# geckodriver_path ='/home/user/app/geckodriver'
|
12 |
# os.environ['PATH'] += ':' + geckodriver_path
|
|
|
46 |
return "", err_msg
|
47 |
|
48 |
|
49 |
+
async def scrape_text(url, n_words=15,max_retries=2):
|
50 |
scraped_text = ""
|
51 |
scrape_error = ""
|
52 |
try:
|
53 |
n_tries = 1
|
54 |
while (n_tries <= max_retries) and (scraped_text == ""):
|
55 |
+
scraped_text, scrape_error = await get_text(url=url, n_words=n_words)
|
56 |
n_tries += 1
|
57 |
return scraped_text, scrape_error
|
58 |
except Exception as e:
|