bcci commited on
Commit
a83a227
·
verified ·
1 Parent(s): 056c0e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -31,12 +31,8 @@ async def scraper(url):
31
 
32
  async def text_scraper(url):
33
  """Fetches HTML content using AsyncFetcher and than extract text."""
34
- html = await async_fetcher.get(url).get_all_text() # Use await for async operations
35
- print(html)
36
- text = html
37
- text = re.sub(r'\n+', ' ', text)
38
- text = re.sub(r'\s+', ' ', text)
39
- return text
40
 
41
 
42
  async def convert_html_to_md(html):
 
31
 
32
  async def text_scraper(url):
33
  """Fetches HTML content using AsyncFetcher and than extract text."""
34
+ html = await async_fetcher.get(url)
35
+ return re.sub(r'\s+', ' ', re.sub(r'\n+', ' ', html.get_all_text())).strip()
 
 
 
 
36
 
37
 
38
  async def convert_html_to_md(html):