Spaces:
Runtime error
Runtime error
import gradio as gr | |
import urllib.request | |
import requests | |
import bs4 | |
import lxml | |
import os | |
#import subprocess | |
from huggingface_hub import InferenceClient,HfApi | |
import random | |
import json | |
import datetime | |
import uuid | |
from prompts import ( | |
FINDER, | |
SAVE_MEMORY, | |
COMPRESS_HISTORY_PROMPT, | |
COMPRESS_DATA_PROMPT, | |
COMPRESS_DATA_PROMPT_SMALL, | |
LOG_PROMPT, | |
LOG_RESPONSE, | |
PREFIX, | |
TASK_PROMPT, | |
) | |
reponame="Omnibus/tmp" | |
save_data=f'https://huggingface.co/datasets/{reponame}/raw/main/' | |
token_self = os.environ['HF_TOKEN'] | |
api=HfApi(token=token_self) | |
client = InferenceClient( | |
"mistralai/Mixtral-8x7B-Instruct-v0.1" | |
) | |
from gradio_client import Client | |
client2 = Client("https://omnibus-html-image-current-tab.hf.space/--replicas/strm7/") | |
def get_screenshot(chat,height=5000,width=600,chatblock=[1],header=True,theme="light",wait=3000): | |
result = client2.predict(chat,height,width,chatblock,header,theme,wait,api_name="/run_script") | |
print (result[0]) | |
def parse_action(string: str): | |
print("PARSING:") | |
print(string) | |
assert string.startswith("action:") | |
idx = string.find("action_input=") | |
print(idx) | |
if idx == -1: | |
print ("idx == -1") | |
print (string[8:]) | |
return string[8:], None | |
print ("last return:") | |
print (string[8 : idx - 1]) | |
print (string[idx + 13 :].strip("'").strip('"')) | |
return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"') | |
VERBOSE = True | |
MAX_HISTORY = 100 | |
MAX_DATA = 20000 | |
def format_prompt(message, history): | |
prompt = "<s>" | |
for user_prompt, bot_response in history: | |
prompt += f"[INST] {user_prompt} [/INST]" | |
prompt += f" {bot_response}</s> " | |
prompt += f"[INST] {message} [/INST]" | |
return prompt | |
def run_gpt( | |
prompt_template, | |
stop_tokens, | |
max_tokens, | |
seed, | |
purpose, | |
**prompt_kwargs, | |
): | |
timestamp=datetime.datetime.now() | |
print(seed) | |
generate_kwargs = dict( | |
temperature=0.9, | |
max_new_tokens=max_tokens, | |
top_p=0.95, | |
repetition_penalty=1.0, | |
do_sample=True, | |
seed=seed, | |
) | |
content = PREFIX.format( | |
timestamp=timestamp, | |
purpose=purpose, | |
) + prompt_template.format(**prompt_kwargs) | |
if VERBOSE: | |
print(LOG_PROMPT.format(content)) | |
#formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history) | |
#formatted_prompt = format_prompt(f'{content}', **prompt_kwargs['history']) | |
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False) | |
resp = "" | |
for response in stream: | |
resp += response.token.text | |
#yield resp | |
if VERBOSE: | |
print(LOG_RESPONSE.format(resp)) | |
return resp | |
def compress_data(c,purpose, task, history, result): | |
seed=random.randint(1,1000000000) | |
print (c) | |
#tot=len(purpose) | |
#print(tot) | |
divr=int(c)/MAX_DATA | |
divi=int(divr)+1 if divr != int(divr) else int(divr) | |
chunk = int(int(c)/divr) | |
print(f'chunk:: {chunk}') | |
print(f'divr:: {divr}') | |
print (f'divi:: {divi}') | |
#out = [] | |
#out="" | |
s=0 | |
e=chunk | |
print(f'e:: {e}') | |
new_history="" | |
task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n' | |
for z in range(divi): | |
print(f's:e :: {s}:{e}') | |
hist = history[s:e] | |
resp = run_gpt( | |
COMPRESS_DATA_PROMPT, | |
stop_tokens=["observation:", "task:", "action:", "thought:"], | |
max_tokens=2048, | |
seed=seed, | |
purpose=purpose, | |
task=task, | |
knowledge=new_history, | |
history=hist, | |
).strip('\n') | |
new_history = resp | |
print (resp) | |
#out+=resp | |
e=e+chunk | |
s=s+chunk | |
''' | |
resp = run_gpt( | |
COMPRESS_DATA_PROMPT, | |
stop_tokens=["observation:", "task:", "action:", "thought:"], | |
max_tokens=2048, | |
seed=seed, | |
purpose=purpose, | |
task=task, | |
knowledge=new_history, | |
history=result, | |
) | |
''' | |
print ("final" + resp) | |
#history = resp | |
#history = "result: {}\n".format(resp) | |
return resp | |
def save_memory(purpose, history): | |
uid=uuid.uuid4() | |
history=str(history) | |
c=0 | |
inp = str(history) | |
rl = len(inp) | |
print(f'rl:: {rl}') | |
for i in str(inp): | |
if i == " " or i=="," or i=="\n" or i=="/" or i=="." or i=="<": | |
c +=1 | |
print (f'c:: {c}') | |
seed=random.randint(1,1000000000) | |
print (c) | |
#tot=len(purpose) | |
#print(tot) | |
divr=int(c)/MAX_DATA | |
divi=int(divr)+1 if divr != int(divr) else int(divr) | |
chunk = int(int(c)/divr) | |
print(f'chunk:: {chunk}') | |
print(f'divr:: {divr}') | |
print (f'divi:: {divi}') | |
#out = [] | |
#out="" | |
s=0 | |
e=chunk | |
print(f'e:: {e}') | |
new_history="" | |
task = f'Index this Data\n' | |
for z in range(divi): | |
print(f's:e :: {s}:{e}') | |
hist = inp[s:e] | |
resp = run_gpt( | |
SAVE_MEMORY, | |
stop_tokens=["observation:", "task:", "action:", "thought:"], | |
max_tokens=4096, | |
seed=seed, | |
purpose=purpose, | |
task=task, | |
knowledge=new_history, | |
history=hist, | |
).strip('\n') | |
new_history = resp | |
print (resp) | |
#out+=resp | |
e=e+chunk | |
s=s+chunk | |
print ("final1" + resp) | |
try: | |
resp='[{'+resp.split('[{')[1].split('</s>')[0] | |
print ("final2\n" + resp) | |
print(f"keywords:: {resp['keywords']}") | |
except Exception as e: | |
resp = resp | |
print(e) | |
timestamp=str(datetime.datetime.now()) | |
timename=timestamp.replace(" ","--").replace(":","-").replace(".","-") | |
json_object=resp | |
#json_object = json.dumps(out_box) | |
#json_object = json.dumps(out_box,indent=4) | |
with open(f"tmp-{uid}.json", "w") as outfile: | |
outfile.write(json_object) | |
api.upload_file( | |
path_or_fileobj=f"tmp-{uid}.json", | |
path_in_repo=f"/mem-test/{timename}.json", | |
repo_id=reponame, | |
#repo_id=save_data.split('datasets/',1)[1].split('/raw',1)[0], | |
token=token_self, | |
repo_type="dataset", | |
) | |
lines = resp.strip().strip("\n").split("\n") | |
r = requests.get(f'{save_data}mem-test/main.json') | |
print(f'status code main:: {r.status_code}') | |
if r.status_code==200: | |
lod = json.loads(r.text) | |
#lod = eval(lod) | |
print (f'lod:: {lod}') | |
else: | |
lod = [] | |
for i,line in enumerate(lines): | |
key_box=[] | |
print(f'LINE:: {line}') | |
if ":" in line: | |
print(f'line:: {line}') | |
if "keywords" in line[:16]: | |
print(f'trying:: {line}') | |
keyw=line.split(":")[1] | |
print (keyw) | |
print (keyw.split("[")[1].split("]")[0]) | |
keyw=keyw.split("[")[1].split("]")[0] | |
for ea in keyw.split(","): | |
s1="" | |
ea=ea.strip().strip("\n") | |
for ev in ea: | |
if ev.isalnum(): | |
s1+=ev | |
if ev == " ": | |
s1+=ev | |
#ea=s1 | |
print(s1) | |
key_box.append(s1) | |
lod.append({"file_name":timename,"keywords":key_box}) | |
json_object = json.dumps(lod, indent=4) | |
with open(f"tmp2-{uid}.json", "w") as outfile2: | |
outfile2.write(json_object) | |
api.upload_file( | |
path_or_fileobj=f"tmp2-{uid}.json", | |
path_in_repo=f"/mem-test/main.json", | |
repo_id=reponame, | |
#repo_id=save_data.split('datasets/',1)[1].split('/raw',1)[0], | |
token=token_self, | |
repo_type="dataset", | |
) | |
return [resp] | |
def compress_history(purpose, task, history): | |
resp = run_gpt( | |
COMPRESS_HISTORY_PROMPT, | |
stop_tokens=["observation:", "task:", "action:", "thought:"], | |
max_tokens=1024, | |
seed=random.randint(1,1000000000), | |
purpose=purpose, | |
task=task, | |
history=history, | |
) | |
history = "observation: {}\n".format(resp) | |
return history | |
def call_main(purpose, task, history, action_input, result): | |
resp = run_gpt( | |
FINDER, | |
stop_tokens=["observation:", "task:"], | |
max_tokens=2048, | |
seed=random.randint(1,1000000000), | |
purpose=purpose, | |
task=task, | |
history=history, | |
) | |
lines = resp.strip().strip("\n").split("\n") | |
#history="" | |
for line in lines: | |
if line == "": | |
continue | |
if line.startswith("thought: "): | |
history += "{}\n".format(line) | |
if line.startswith("action: "): | |
action_name, action_input = parse_action(line) | |
print(f'ACTION::{action_name} -- INPUT :: {action_input}') | |
#history += "{}\n".format(line) | |
return action_name, action_input, history, task, result | |
else: | |
pass | |
#history += "{}\n".format(line) | |
#assert False, "unknown action: {}".format(line) | |
#return "UPDATE-TASK", None, history, task | |
if "VERBOSE": | |
print(history) | |
return "MAIN", None, history, task, result | |
def call_set_task(purpose, task, history, action_input, result): | |
task = run_gpt( | |
TASK_PROMPT, | |
stop_tokens=[], | |
max_tokens=1024, | |
seed=random.randint(1,1000000000), | |
purpose=purpose, | |
task=task, | |
history=history, | |
).strip("\n") | |
history += "observation: task has been updated to: {}\n".format(task) | |
return "MAIN", None, history, task, result | |
########################################################### | |
def search_all(url): | |
source="" | |
return source | |
def find_all(purpose,task,history, url, result): | |
return_list=[] | |
print (url) | |
print (f"trying URL:: {url}") | |
try: | |
if url != "" and url != None: | |
out = [] | |
source = requests.get(url) | |
if source.status_code ==200: | |
soup = bs4.BeautifulSoup(source.content,'lxml') | |
rawp=(f'RAW TEXT RETURNED: {soup.text}') | |
cnt=0 | |
cnt+=len(rawp) | |
out.append(rawp) | |
out.append("HTML fragments: ") | |
q=("a","p","span","content","article") | |
for p in soup.find_all("a"): | |
out.append([{"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string}]) | |
c=0 | |
out = str(out) | |
rl = len(out) | |
print(f'rl:: {rl}') | |
for i in str(out): | |
if i == " " or i=="," or i=="\n" or i=="/" or i=="." or i=="<": | |
c +=1 | |
print (f'c:: {c}') | |
#if c > MAX_HISTORY: | |
print("compressing...") | |
rawp = compress_data(c,purpose,task,out,result) | |
result += rawp | |
#else: | |
# rawp = out | |
#print (rawp) | |
#print (f'out:: {out}') | |
history += "observation: the search results are:\n {}\n".format(rawp) | |
task = "compile report or complete?" | |
return "MAIN", None, history, task, result | |
else: | |
history += f"observation: That URL string returned an error: {source.status_code}, I should try a different URL string\n" | |
#result="Still Working..." | |
return "MAIN", None, history, task, result | |
else: | |
history += "observation: An Error occured\nI need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" | |
return "MAIN", None, history, task, result | |
except Exception as e: | |
print (e) | |
history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" | |
return "MAIN", None, history, task, result | |
#else: | |
# history = "observation: The search query I used did not return a valid response" | |
return "MAIN", None, history, task, result | |
################################# | |
NAME_TO_FUNC = { | |
"MAIN": call_main, | |
"UPDATE-TASK": call_set_task, | |
"SEARCH_ENGINE": find_all, | |
"SCRAPE_WEBSITE": find_all, | |
} | |
def run_action(purpose, task, history, action_name, action_input,result): | |
if "COMPLETE" in action_name: | |
print("Complete - Exiting") | |
#exit(0) | |
return "COMPLETE", None, history, task, result | |
# compress the history when it is long | |
if len(history.split("\n")) > MAX_HISTORY: | |
if VERBOSE: | |
print("COMPRESSING HISTORY") | |
history = compress_history(purpose, task, history) | |
if action_name in NAME_TO_FUNC: | |
assert action_name in NAME_TO_FUNC | |
print(f"RUN: {action_name} ACTION_INPUT: {action_input}") | |
return NAME_TO_FUNC[action_name](purpose, task, history, action_input, result) | |
else: | |
history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, SCRAPE_WEBSITE, COMPLETE)\n" | |
return "MAIN", None, history, task, result | |
def run(purpose,history): | |
yield [(purpose,"Searching...")] | |
task=None | |
result="" | |
#history = "" | |
if not history: | |
history = "" | |
else: | |
history=str(history) | |
action_name = "MAIN" | |
action_input = None | |
while True: | |
print("") | |
print("") | |
print("---") | |
#print("purpose:", purpose) | |
print("task:", task) | |
print("---") | |
#print(history) | |
print("---") | |
action_name, action_input, history, task, result = run_action( | |
purpose, | |
task, | |
history, | |
action_name, | |
action_input, | |
result | |
) | |
if not result: | |
yield [(purpose,"More Searching...")] | |
else: | |
yield [(purpose,result)] | |
if action_name == "COMPLETE": | |
break | |
return [(purpose,result)] | |
examples =[ | |
"What is the current weather in Florida?", | |
"Find breaking news about Texas", | |
"Find the best deals on flippers for scuba diving", | |
"Teach me to fly a helicopter" | |
] | |
def clear_fn(): | |
return None,None | |
rand_val=random.randint(1,99999999999) | |
def check_rand(inp,val): | |
if inp==True: | |
return gr.Slider(label="Seed", minimum=1, maximum=99999999999, value=random.randint(1,99999999999)) | |
else: | |
return gr.Slider(label="Seed", minimum=1, maximum=99999999999, value=int(val)) | |
with gr.Blocks() as app: | |
gr.HTML("""<center><h1>Mixtral 8x7B RPG</h1><h3>Role Playing Game Master</h3>""") | |
with gr.Group(): | |
with gr.Row(): | |
with gr.Column(scale=3): | |
chatbot=gr.Chatbot(show_label=False, show_share_button=True, show_copy_button=True, likeable=True, layout="panel", height="800px") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
opt=gr.Dropdown(label="Choices",choices=examples,allow_custom_value=True, value="Start a new game", interactive=True) | |
#prompt=gr.Textbox(label = "Prompt", value="Start a new game") | |
with gr.Column(scale=2): | |
rand = gr.Checkbox(label="Random", value=True) | |
seed=gr.Slider(label="Seed", minimum=1, maximum=99999999999, value=rand_val) | |
#models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True) | |
with gr.Row(): | |
button=gr.Button() | |
stop_button=gr.Button("Stop") | |
clear_btn = gr.Button("Clear") | |
with gr.Row(): | |
tokens = gr.Slider(label="Max new tokens",value=2096,minimum=0,maximum=1048*10,step=64,interactive=False, visible=False,info="The maximum numbers of new tokens") | |
with gr.Column(scale=1): | |
save_btn=gr.Button("Save Memory") | |
snap_btn=gr.Button("Take Screenshot") | |
char_stats=gr.Textbox() | |
json_out=gr.JSON() | |
#text=gr.JSON() | |
#inp_query.change(search_models,inp_query,models_dd) | |
#test_b=test_btn.click(itt,url,e_box) | |
save_btn.click(save_memory,[opt,chatbot],json_out) | |
clear_btn.click(clear_fn,None,[opt,chatbot]) | |
#go=button.click(check_rand,[rand,seed],seed).then(run,[opt,chatbot,tokens,char_stats,seed],[chatbot,char_stats,json_out,opt]) | |
go=button.click(check_rand,[rand,seed],seed).then(run,[opt,chatbot],[chatbot]) | |
stop_button.click(None,None,None,cancels=[go]) | |
app.queue(default_concurrency_limit=20).launch(show_api=False) | |
''' | |
examples =[ | |
"What is the current weather in Florida?", | |
"Find breaking news about Texas", | |
"Find the best deals on flippers for scuba diving", | |
"Teach me to fly a helicopter" | |
] | |
gr.ChatInterface( | |
fn=run, | |
chatbot=gr.Chatbot(show_label=False, show_share_button=True, show_copy_button=True, likeable=True, layout="panel", height="800px"), | |
title="Mixtral 46.7B Powered <br> Search", | |
examples=examples, | |
concurrency_limit=20, | |
).launch() | |
''' | |