import gradio as gr import requests import bs4 def link_find(url): out = [] source = requests.get(url) if source.status_code ==200: #soup = bs4.BeautifulSoup(source.content,'lxml') soup = bs4.BeautifulSoup(source.content,'html.parser') rawp=(f'RAW TEXT RETURNED: {soup.text}') cnt=0 cnt+=len(rawp) #out.append(rawp) #out.append("HTML fragments: ") q=("a","p","span","content","article") for p in soup.find_all("a"): out.append({"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string,"TREE":[]}) else: return ["None"] return out def sitemap(url): if url != "" and url != None: link1=link_find(url) for ea in link1: print(ea) out_list=link_find(ea['URL']) ea['TREE']=ea["TREE"].append(out) return out_list with gr.Blocks() as app: inp=gr.Textbox() btn=gr.Button() outp=gr.JSON() btn.click(sitemap,inp,outp) app.launch()