File size: 469 Bytes
f85ff86
 
 
75ddbd7
f85ff86
328de20
f85ff86
 
 
328de20
f85ff86
 
 
328de20
f85ff86
 
328de20
f85ff86
 
328de20
f85ff86
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from curl_cffi import requests as req
from bs4 import BeautifulSoup
import html2text

url = 'https://www.firecrawl.dev/'

# Fetch HTML content
response = req.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Optional: Clean up unwanted tags
for tag in soup(['script', 'style', 'noscript', 'svg']):
    tag.decompose()

# Extract cleaned HTML
clean_html = str(soup)

# Convert to Markdown
markdown = html2text.html2text(clean_html)

# Output
print(markdown)