Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -48,7 +48,7 @@ def parse_action(string: str):
|
|
48 |
|
49 |
VERBOSE = True
|
50 |
MAX_HISTORY = 100
|
51 |
-
MAX_DATA =
|
52 |
|
53 |
def format_prompt(message, history):
|
54 |
prompt = "<s>"
|
@@ -302,21 +302,27 @@ def find_all(purpose,task,history, url):
|
|
302 |
#rawp.append([tag.name for tag in soup.find_all()] )
|
303 |
#print([tag.name for tag in soup.find_all()])
|
304 |
rawp=(f'RAW TEXT RETURNED: {soup.text}')
|
|
|
|
|
305 |
out.append(rawp)
|
306 |
out.append("HTML fragments: ")
|
307 |
q=("a","p","span","content","article")
|
308 |
for p in soup.find_all(q):
|
|
|
|
|
|
|
309 |
out.append([{"string":p.string,"content":p}])
|
310 |
c=0
|
311 |
out = str(out)
|
312 |
rl = len(out)
|
|
|
313 |
print(f'rl:: {rl}')
|
314 |
#for ea in out:
|
315 |
for i in str(out):
|
316 |
if i == " " or i=="," or i=="\n":
|
317 |
c +=1
|
318 |
print (f'c:: {c}')
|
319 |
-
if
|
320 |
print("compressing...")
|
321 |
rawp = compress_data(c,purpose,task,out)
|
322 |
print (rawp)
|
|
|
48 |
|
49 |
VERBOSE = True
|
50 |
MAX_HISTORY = 100
|
51 |
+
MAX_DATA = 20000
|
52 |
|
53 |
def format_prompt(message, history):
|
54 |
prompt = "<s>"
|
|
|
302 |
#rawp.append([tag.name for tag in soup.find_all()] )
|
303 |
#print([tag.name for tag in soup.find_all()])
|
304 |
rawp=(f'RAW TEXT RETURNED: {soup.text}')
|
305 |
+
cnt=0
|
306 |
+
cnt+=len(rawp)
|
307 |
out.append(rawp)
|
308 |
out.append("HTML fragments: ")
|
309 |
q=("a","p","span","content","article")
|
310 |
for p in soup.find_all(q):
|
311 |
+
cnt+=len(p.string)
|
312 |
+
cnt+=len(p)
|
313 |
+
|
314 |
out.append([{"string":p.string,"content":p}])
|
315 |
c=0
|
316 |
out = str(out)
|
317 |
rl = len(out)
|
318 |
+
print (cnt)
|
319 |
print(f'rl:: {rl}')
|
320 |
#for ea in out:
|
321 |
for i in str(out):
|
322 |
if i == " " or i=="," or i=="\n":
|
323 |
c +=1
|
324 |
print (f'c:: {c}')
|
325 |
+
if c > MAX_DATA:
|
326 |
print("compressing...")
|
327 |
rawp = compress_data(c,purpose,task,out)
|
328 |
print (rawp)
|