Omnibus commited on
Commit
8849abb
·
verified ·
1 Parent(s): 366c803

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -16
app.py CHANGED
@@ -21,10 +21,18 @@ def link_find(url):
21
 
22
  q=("a","p","span","content","article")
23
  for p in soup.find_all("a"):
24
- node1['LINKS'].append(p.get('href'))
25
- node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","LINKS":[],"TREE":[]})
26
- node2['TREE'].append({"URL":p.get('href'),"LINKS":[],"TREE":[]})
27
- node2['LINKS'].append(p.get('href'))
 
 
 
 
 
 
 
 
28
 
29
  #out.append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
30
 
@@ -36,18 +44,19 @@ def link_find(url):
36
 
37
  def sitemap(url,level):
38
  uri=""
 
39
  if url != "" and url != None:
40
  link1,link2=link_find(url)
41
  if level >=2:
42
  for i,ea in enumerate(link1['TREE']):
43
  print(ea)
44
  try:
45
- if not ea['URL'].startswith("http"):
46
- uri1=url.split("//")[0]
47
- uri2=url.split("//")[1]
48
- uri3=uri2.split("/")[0]
49
- uri=f'{uri1}//{uri3}'
50
- print(uri)
51
  out_list1,out_list2=link_find(f"{uri}{ea['URL']}")
52
  link1['TREE'][i]=out_list1
53
  link2['TREE'][i]=out_list2
@@ -57,12 +66,12 @@ def sitemap(url,level):
57
  for n,na in enumerate(link1['TREE'][i]['TREE']):
58
  print(na)
59
  try:
60
- if not na['URL'].startswith("http"):
61
- uri11=url.split("//")[0]
62
- uri22=url.split("//")[1]
63
- uri33=uri22.split("/")[0]
64
- uri0=f'{uri11}//{uri33}'
65
- print(uri0)
66
  out_list1,out_list2=link_find(f"{uri0}{na['URL']}")
67
  link1['TREE'][i]['TREE'][n]=out_list1
68
  link2['TREE'][i]['TREE'][n]=out_list2
 
21
 
22
  q=("a","p","span","content","article")
23
  for p in soup.find_all("a"):
24
+ url0=p.get('href')
25
+ if not url0.startswith("http"):
26
+ uri1=url0.split("//")[0]
27
+ uri2=url0.split("//")[1]
28
+ uri3=uri2.split("/")[0]
29
+ uri=f'{uri1}//{uri3}'
30
+ print(uri)
31
+
32
+ node1['LINKS'].append(uri)
33
+ node1['TREE'].append({"URL":uri,"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","LINKS":[],"TREE":[]})
34
+ node2['TREE'].append({"URL":uri,"LINKS":[],"TREE":[]})
35
+ node2['LINKS'].append(uri)
36
 
37
  #out.append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
38
 
 
44
 
45
  def sitemap(url,level):
46
  uri=""
47
+ uri0=""
48
  if url != "" and url != None:
49
  link1,link2=link_find(url)
50
  if level >=2:
51
  for i,ea in enumerate(link1['TREE']):
52
  print(ea)
53
  try:
54
+ #if not ea['URL'].startswith("http"):
55
+ # uri1=url.split("//")[0]
56
+ # uri2=url.split("//")[1]
57
+ # uri3=uri2.split("/")[0]
58
+ # uri=f'{uri1}//{uri3}'
59
+ # print(uri)
60
  out_list1,out_list2=link_find(f"{uri}{ea['URL']}")
61
  link1['TREE'][i]=out_list1
62
  link2['TREE'][i]=out_list2
 
66
  for n,na in enumerate(link1['TREE'][i]['TREE']):
67
  print(na)
68
  try:
69
+ #if not na['URL'].startswith("http"):
70
+ # uri11=url.split("//")[0]
71
+ # uri22=url.split("//")[1]
72
+ # uri33=uri22.split("/")[0]
73
+ # uri0=f'{uri11}//{uri33}'
74
+ # print(uri0)
75
  out_list1,out_list2=link_find(f"{uri0}{na['URL']}")
76
  link1['TREE'][i]['TREE'][n]=out_list1
77
  link2['TREE'][i]['TREE'][n]=out_list2