bcci commited on
Commit
6dc195c
·
verified ·
1 Parent(s): 69ebfb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -2,23 +2,23 @@ from fastapi import FastAPI, Request, HTTPException
2
  from fastapi.responses import PlainTextResponse
3
  from urllib.parse import unquote
4
  import uvicorn
5
- from scrapling import Fetcher, StealthyFetcher
6
  from markitdown import MarkItDown
7
  import tempfile
8
  import os
9
 
10
  app = FastAPI()
11
 
12
- fetcher = Fetcher(auto_match=True)
13
  stealthy_fetcher = StealthyFetcher()
14
  md = MarkItDown()
15
 
16
  def stealthy_scraper(url):
17
- html = stealthy_fetcher.fetch(url)
18
  return html.html_content
19
 
20
  def scraper(url):
21
- html = fetcher.get(url)
22
  return html.html_content
23
 
24
  def convert_html_to_md(html):
@@ -52,7 +52,7 @@ async def get_markdown_get(request: Request, url: str):
52
 
53
  # GET endpoint to /read/{url:path} expecting URL in path
54
  @app.get("/reader/{url:path}", response_class=PlainTextResponse)
55
- def get_markdown_get(request: Request, url: str):
56
  try:
57
  # Retrieve the full path from the request
58
  full_url = str(request.url)
 
2
  from fastapi.responses import PlainTextResponse
3
  from urllib.parse import unquote
4
  import uvicorn
5
+ from scrapling import AsyncFetcher, StealthyFetcher
6
  from markitdown import MarkItDown
7
  import tempfile
8
  import os
9
 
10
  app = FastAPI()
11
 
12
+ fetcher = AsyncFetcher(auto_match=True)
13
  stealthy_fetcher = StealthyFetcher()
14
  md = MarkItDown()
15
 
16
  def stealthy_scraper(url):
17
+ html = await stealthy_fetcher.async_fetch(url)
18
  return html.html_content
19
 
20
  def scraper(url):
21
+ html = await fetcher.get(url)
22
  return html.html_content
23
 
24
  def convert_html_to_md(html):
 
52
 
53
  # GET endpoint to /read/{url:path} expecting URL in path
54
  @app.get("/reader/{url:path}", response_class=PlainTextResponse)
55
+ async def get_markdown_get(request: Request, url: str):
56
  try:
57
  # Retrieve the full path from the request
58
  full_url = str(request.url)