File size: 1,010 Bytes
ae4e988
 
 
 
437cf54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae4e988
6660108
437cf54
2e1cde2
 
 
437cf54
ae4e988
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import gradio as gr
import requests
import bs4

def link_find(url):
    out = []
    source = requests.get(url)
    if source.status_code ==200:
        #soup = bs4.BeautifulSoup(source.content,'lxml')
        soup = bs4.BeautifulSoup(source.content,'html.parser')
        
        rawp=(f'RAW TEXT RETURNED: {soup.text}')
        cnt=0
        cnt+=len(rawp)
        out.append(rawp)
        out.append("HTML fragments: ")
        q=("a","p","span","content","article")
        for p in soup.find_all("a"):
            out.append({"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string,"TREE":[]})    
    else:
        return "None"
    return out


def sitemap(url):
    if url != "" and url != None:    
        link1=link_find(url)
        for ea in link1:
            out_list=link_find(ea['URL'])
            ea['TREE']=ea["TREE"].append(out)
    return out_list
with gr.Blocks() as app:
    inp=gr.Textbox()
    btn=gr.Button()
    outp=gr.JSON()
    btn.click(sitemap,inp,outp)
app.launch()