Spaces:
Sleeping
Sleeping
File size: 1,555 Bytes
53dbd29 d646a51 53dbd29 d646a51 53dbd29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import re
def scrape_visible_text_from_url(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
for tag in soup(["script", "style", "meta", "link", "noscript", "header", "footer", "aside", "nav", "img"]):
tag.extract()
header_content = soup.find("header")
header_text = header_content.get_text() if header_content else ""
paragraph_content = soup.find_all("p")
paragraph_text = " ".join([p.get_text() for p in paragraph_content])
visible_text = f"{header_text}\n\n{paragraph_text}"
visible_text = re.sub(r'\s+', ' ', visible_text)
return visible_text.strip()
except Exception as e:
st.error(f"Error occurred while scraping the data: {e}")
return None
#ST
def main():
st.title("Web Data Scraper")
url_input = st.text_input("Enter the URL ๐โ๏ธ:", "")
if st.button("Load Datum ๐ง"):
if url_input:
data = scrape_visible_text_from_url(url_input)
if data:
st.success("Data text successfully scraped!")
st.subheader("Scraped Text :")
st.write(data)
else:
st.warning("Failed to load data from the URL.")
else:
st.warning("Please enter a valid URL.")
if __name__ == "__main__":
main()
|