Omnibus commited on
Commit
acf5be1
·
verified ·
1 Parent(s): 1a8a73c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -48,7 +48,7 @@ def parse_action(string: str):
48
 
49
  VERBOSE = True
50
  MAX_HISTORY = 100
51
- MAX_DATA = 1000
52
 
53
  def format_prompt(message, history):
54
  prompt = "<s>"
@@ -302,21 +302,27 @@ def find_all(purpose,task,history, url):
302
  #rawp.append([tag.name for tag in soup.find_all()] )
303
  #print([tag.name for tag in soup.find_all()])
304
  rawp=(f'RAW TEXT RETURNED: {soup.text}')
 
 
305
  out.append(rawp)
306
  out.append("HTML fragments: ")
307
  q=("a","p","span","content","article")
308
  for p in soup.find_all(q):
 
 
 
309
  out.append([{"string":p.string,"content":p}])
310
  c=0
311
  out = str(out)
312
  rl = len(out)
 
313
  print(f'rl:: {rl}')
314
  #for ea in out:
315
  for i in str(out):
316
  if i == " " or i=="," or i=="\n":
317
  c +=1
318
  print (f'c:: {c}')
319
- if rl > MAX_DATA:
320
  print("compressing...")
321
  rawp = compress_data(c,purpose,task,out)
322
  print (rawp)
 
48
 
49
  VERBOSE = True
50
  MAX_HISTORY = 100
51
+ MAX_DATA = 20000
52
 
53
  def format_prompt(message, history):
54
  prompt = "<s>"
 
302
  #rawp.append([tag.name for tag in soup.find_all()] )
303
  #print([tag.name for tag in soup.find_all()])
304
  rawp=(f'RAW TEXT RETURNED: {soup.text}')
305
+ cnt=0
306
+ cnt+=len(rawp)
307
  out.append(rawp)
308
  out.append("HTML fragments: ")
309
  q=("a","p","span","content","article")
310
  for p in soup.find_all(q):
311
+ cnt+=len(p.string)
312
+ cnt+=len(p)
313
+
314
  out.append([{"string":p.string,"content":p}])
315
  c=0
316
  out = str(out)
317
  rl = len(out)
318
+ print (cnt)
319
  print(f'rl:: {rl}')
320
  #for ea in out:
321
  for i in str(out):
322
  if i == " " or i=="," or i=="\n":
323
  c +=1
324
  print (f'c:: {c}')
325
+ if c > MAX_DATA:
326
  print("compressing...")
327
  rawp = compress_data(c,purpose,task,out)
328
  print (rawp)