Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -31,12 +31,8 @@ async def scraper(url):
|
|
31 |
|
32 |
async def text_scraper(url):
|
33 |
"""Fetches HTML content using AsyncFetcher and than extract text."""
|
34 |
-
html = await async_fetcher.get(url)
|
35 |
-
|
36 |
-
text = html
|
37 |
-
text = re.sub(r'\n+', ' ', text)
|
38 |
-
text = re.sub(r'\s+', ' ', text)
|
39 |
-
return text
|
40 |
|
41 |
|
42 |
async def convert_html_to_md(html):
|
|
|
31 |
|
32 |
async def text_scraper(url):
|
33 |
"""Fetches HTML content using AsyncFetcher and than extract text."""
|
34 |
+
html = await async_fetcher.get(url)
|
35 |
+
return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html.get_all_text())).strip()
|
|
|
|
|
|
|
|
|
36 |
|
37 |
|
38 |
async def convert_html_to_md(html):
|