File size: 790 Bytes
ae4e988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import gradio as gr
import requests
import bs4

def sitemap(url):
  if url != "" and url != None:    
      out = []
      source = requests.get(url)
      if source.status_code ==200:
          #soup = bs4.BeautifulSoup(source.content,'lxml')
          soup = bs4.BeautifulSoup(source.content,'html.parser')
         
          rawp=(f'RAW TEXT RETURNED: {soup.text}')
          cnt=0
          cnt+=len(rawp)
          out.append(rawp)
          out.append("HTML fragments: ")
          q=("a","p","span","content","article")
          for p in soup.find_all("a"):
              out.append({"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string})
with gr.Blocks() as app:
    inp=gr.Textbox()
    btn=gr.Button()
    outp=gr.JSON()
    btn.click(sitemap,inp,outp)
app.launch()