File size: 1,555 Bytes
53dbd29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d646a51
53dbd29
 
 
 
d646a51
53dbd29
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import streamlit as st
import requests
from bs4 import BeautifulSoup
import re

def scrape_visible_text_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')

        
        for tag in soup(["script", "style", "meta", "link", "noscript", "header", "footer", "aside", "nav", "img"]):
            tag.extract()

       
        header_content = soup.find("header")
        header_text = header_content.get_text() if header_content else ""

 
        paragraph_content = soup.find_all("p")
        paragraph_text = " ".join([p.get_text() for p in paragraph_content])

        
        visible_text = f"{header_text}\n\n{paragraph_text}"

        
        visible_text = re.sub(r'\s+', ' ', visible_text)
        return visible_text.strip()
    except Exception as e:
        st.error(f"Error occurred while scraping the data: {e}")
        return None

#ST

def main():
    st.title("Web Data Scraper")

    
    url_input = st.text_input("Enter the URL ๐Ÿ‘‰โœ๏ธ:", "")

    if st.button("Load Datum ๐Ÿงˆ"):
        if url_input:
           
            data = scrape_visible_text_from_url(url_input)
            if data:
                st.success("Data text successfully scraped!")
                st.subheader("Scraped Text :")
                st.write(data)
            else:
                st.warning("Failed to load data from the URL.")
        else:
            st.warning("Please enter a valid URL.")

if __name__ == "__main__":
    main()