from fastapi import FastAPI, Request, HTTPException from fastapi.responses import PlainTextResponse from urllib.parse import unquote import uvicorn from scrapling import Fetcher, StealthyFetcher from markitdown import MarkItDown import tempfile import os app = FastAPI() fetcher = Fetcher(auto_match=True) stealthy_fetcher = StealthyFetcher() md = MarkItDown() def stealthy_scraper(url): html = stealthy_fetcher.fetch(url) return html.html_content def scraper(url): html = fetcher.get(url) return html.html_content def convert_html_to_md(html): with tempfile.NamedTemporaryFile(suffix=".html" ,delete=False) as temp_file: temp_file.write(html.encode('utf-8')) temp_file.flush() temp_file_path = temp_file.name print(temp_file_path) x = md.convert(temp_file_path).text_content os.remove(temp_file_path) return x # GET endpoint to /read/{url:path} expecting URL in path @app.get("/read/{url:path}", response_class=PlainTextResponse) async def get_markdown_get(request: Request, url: str): try: # Retrieve the full path from the request full_url = str(request.url) # Extract the part of the URL after `/read/` full_url = full_url.split("/read/")[1] # Additional optional URL validation if needed if not full_url.startswith(('http://', 'https://')): full_url = f"http://{full_url}" markdown_output = convert_html_to_md(scraper(full_url)) return PlainTextResponse(markdown_output) except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing URL: {e}") # GET endpoint to /read/{url:path} expecting URL in path @app.get("/reader/{url:path}", response_class=PlainTextResponse) def get_markdown_get(request: Request, url: str): try: # Retrieve the full path from the request full_url = str(request.url) # Extract the part of the URL after `/read/` full_url = full_url.split("/reader/")[1] # Additional optional URL validation if needed if not full_url.startswith(('http://', 'https://')): full_url = f"http://{full_url}" markdown_output = convert_html_to_md(stealthy_scraper(full_url)) return PlainTextResponse(markdown_output) except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing URL: {e}") if __name__ == "__main__": import subprocess try: subprocess.run(['camoufox', 'fetch'], check=True) print("Command executed successfully!") except Exception as e: print(f"An unexpected error occurred: {e}") import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)