Spaces:
Running
Running
from curl_cffi import requests as req | |
from bs4 import BeautifulSoup | |
import html2text | |
url = 'https://www.firecrawl.dev/' | |
# Fetch HTML content | |
response = req.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Optional: Clean up unwanted tags | |
for tag in soup(['script', 'style', 'noscript', 'svg']): | |
tag.decompose() | |
# Extract cleaned HTML | |
clean_html = str(soup) | |
# Convert to Markdown | |
markdown = html2text.html2text(clean_html) | |
# Output | |
print(markdown) |