Text-to-Chart / app.py
Omnibus's picture
Update app.py
ade7b77 verified
raw
history blame contribute delete
16.5 kB
import gradio as gr
import urllib
import base64
import random
import requests
import bs4
import lxml
import os
from huggingface_hub import InferenceClient,HfApi
import random
import json
import datetime
from pypdf import PdfReader
import uuid
from PIL import Image
from screenshot import create_ss
from agent import (
PREFIX,
GET_CHART,
COMPRESS_DATA_PROMPT,
COMPRESS_DATA_PROMPT_SMALL,
LOG_PROMPT,
LOG_RESPONSE,
)
api=HfApi()
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
############ Document Functions #################
def find_all(url):
return_list=[]
print (url)
#if action_input in query.tasks:
print (f"trying URL:: {url}")
try:
if url != "" and url != None:
out = []
source = requests.get(url)
#source = urllib.request.urlopen(url).read()
soup = bs4.BeautifulSoup(source.content,'lxml')
rawp=(f'RAW TEXT RETURNED: {soup.text}')
cnt=0
cnt+=len(rawp)
out.append(rawp)
out.append("HTML fragments: ")
q=("a","p","span","content","article")
for p in soup.find_all("a"):
out.append([{"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string}])
print(rawp)
return True, rawp
else:
return False, "Enter Valid URL"
except Exception as e:
print (e)
return False, f'Error: {e}'
return "MAIN", None, history, task
def read_txt(txt_path):
text=""
with open(txt_path,"r") as f:
text = f.read()
f.close()
print (text)
return text
def read_pdf(pdf_path):
text=""
reader = PdfReader(f'{pdf_path}')
number_of_pages = len(reader.pages)
for i in range(number_of_pages):
page = reader.pages[i]
text = f'{text}\n{page.extract_text()}'
print (text)
return text
error_box=[]
def read_pdf_online(url):
uid=uuid.uuid4()
print(f"reading {url}")
response = requests.get(url, stream=True)
print(response.status_code)
text=""
try:
if response.status_code == 200:
with open("test.pdf", "wb") as f:
f.write(response.content)
#f.close()
#out = Path("./data.pdf")
#print (out)
reader = PdfReader("test.pdf")
number_of_pages = len(reader.pages)
print(number_of_pages)
for i in range(number_of_pages):
page = reader.pages[i]
text = f'{text}\n{page.extract_text()}'
print(f"PDF_TEXT:: {text}")
return text
else:
text = response.status_code
error_box.append(url)
print(text)
return text
except Exception as e:
print (e)
return e
VERBOSE = True
MAX_HISTORY = 100
MAX_DATA = 20000
def format_prompt(message, history):
prompt = "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def run_gpt_no_prefix(
prompt_template,
stop_tokens,
max_tokens,
seed,
**prompt_kwargs,
):
print(seed)
try:
generate_kwargs = dict(
temperature=0.9,
max_new_tokens=max_tokens,
top_p=0.95,
repetition_penalty=1.0,
do_sample=True,
seed=seed,
)
content = prompt_template.format(**prompt_kwargs)
#if VERBOSE:
print(LOG_PROMPT.format(content))
#formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
#formatted_prompt = format_prompt(f'{content}', history)
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
resp = ""
for response in stream:
resp += response.token.text
#yield resp
#if VERBOSE:
print(LOG_RESPONSE.format(resp))
return resp
except Exception as e:
print(f'no_prefix_error:: {e}')
return "Error"
def run_gpt(
prompt_template,
stop_tokens,
max_tokens,
seed,
**prompt_kwargs,
):
print(seed)
timestamp=datetime.datetime.now()
generate_kwargs = dict(
temperature=0.9,
max_new_tokens=max_tokens,
top_p=0.95,
repetition_penalty=1.0,
do_sample=True,
seed=seed,
)
content = PREFIX.format(
timestamp=timestamp,
purpose="Compile the provided data and complete the users task"
) + prompt_template.format(**prompt_kwargs)
#if VERBOSE:
print(LOG_PROMPT.format(content))
#formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
#formatted_prompt = format_prompt(f'{content}', history)
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
resp = ""
for response in stream:
resp += response.token.text
#yield resp
if VERBOSE:
print(LOG_RESPONSE.format(resp))
return resp
def compress_data(c, instruct, history, seed):
#seed=random.randint(1,1000000000)
print (c)
#tot=len(purpose)
#print(tot)
divr=int(c)/MAX_DATA
divi=int(divr)+1 if divr != int(divr) else int(divr)
chunk = int(int(c)/divr)
print(f'chunk:: {chunk}')
print(f'divr:: {divr}')
print (f'divi:: {divi}')
out = []
#out=""
s=0
e=chunk
print(f'e:: {e}')
new_history=""
#task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
for z in range(divi):
print(f's:e :: {s}:{e}')
hist = history[s:e]
resp = run_gpt(
COMPRESS_DATA_PROMPT_SMALL,
stop_tokens=["observation:", "task:", "action:", "thought:"],
max_tokens=8192,
seed=seed,
direction=instruct,
knowledge="",
history=hist,
).strip("\n")
out.append(resp)
#new_history = resp
print (resp)
#out+=resp
e=e+chunk
s=s+chunk
return out
def get_chart(inp,seed):
#seed=random.randint(1,1000000000)
try:
resp = run_gpt_no_prefix(
GET_CHART,
stop_tokens=[],
max_tokens=8192,
seed=seed,
inp=inp,
).strip("\n")
print(resp)
except Exception as e:
print(f'Error:: {e}')
resp = e
return resp
def format_json(inp):
print("FORMATTING:::")
print(type(inp))
print("###########")
print(inp)
print("###########")
print("###########")
new_str=""
matches=["```","#","//"]
for i,line in enumerate(inp):
line = line.strip()
print(line)
#if not any(x in line for x in matches):
new_str+=line.strip("\n").strip("```").strip("#").strip("//")
print("###########")
print("###########")
#inp = inp.strip("<\s>")
new_str=new_str.strip("</s>")
out_json=eval(new_str)
print(out_json)
print("###########")
print("###########")
return out_json
this=["1.25"]
css="""
#wrap { width: 100%; height: 100%; padding: 0; overflow: auto; }
#frame { width: 100%; border: 1px solid black; }
#frame { zoom: $ZOOM; -moz-transform: scale($ZOOM); -moz-transform-origin: 0 0; }
"""
def mm(graph,zoom):
code_out=""
for ea in graph.split("\n"):
code=ea.strip().strip("\n")
code_out+=code
#out_html=f'''<div><iframe src="https://omnibus-mermaid-script.static.hf.space/index.html?mermaid={code_out}&rand={random.randint(1,1111111111)}" height="500" width="500"></iframe></div>'''
#url=f"https://omnibus-mermaid-script.static.hf.space/index.html?mermaid={code_out}"
url=f"https://omnibus-mermaid-script.static.hf.space/index.html?mermaid={urllib.parse.quote_plus(code_out)}"
out_html=f'''<div id="wrap" style="width: 100%; height: 100%;max-height:600px; padding: 0; overflow: auto;"><iframe id="frame" src="{url}" style="width:100%; height:600px; border: 1px solid black; zoom: {str(zoom)}; -moz-transform: scale({str(zoom)}); -moz-transform-origin: 0 0;" allow="accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"></iframe></div>'''
return out_html,url
def summarize(inp,history,seed,data=None,files=None,directory=None,url=None,pdf_url=None,pdf_batch=None):
json_box=[]
chart_out=""
if inp == "":
inp = "Process this data"
history.clear()
history = [(inp,"Working on it...")]
yield "",history,chart_out,chart_out,json_box,""
if pdf_batch.startswith("http"):
lab="PDF Batch"
c=0
data=""
for i in str(pdf_batch):
if i==",":
c+=1
print (f'c:: {c}')
try:
for i in range(c+1):
batch_url = pdf_batch.split(",",c)[i]
bb = read_pdf_online(batch_url)
data=f'{data}\nFile Name URL ({batch_url}):\n{bb}'
except Exception as e:
print(e)
#data=f'{data}\nError reading URL ({batch_url})'
if directory:
lab="Directory"
for ea in directory:
print(ea)
if pdf_url.startswith("http"):
lab="PDF URL"
print("PDF_URL")
out = read_pdf_online(pdf_url)
data=out
if url.startswith("http"):
lab="Raw HTML"
val, out = find_all(url)
if not val:
data="Error"
rawp = str(out)
else:
data=out
if files:
lab="Files"
for i, file in enumerate(files):
try:
print (file)
if file.endswith(".pdf"):
zz=read_pdf(file)
print (zz)
data=f'{data}\nFile Name ({file}):\n{zz}'
elif file.endswith(".txt"):
zz=read_txt(file)
print (zz)
data=f'{data}\nFile Name ({file}):\n{zz}'
except Exception as e:
data=f'{data}\nError opening File Name ({file})'
print (e)
if data != "Error" and data != "":
history.clear()
history = [(inp,f"Data: Loaded, processing...")]
yield "",history,chart_out,chart_out,json_box,""
print(inp)
out = str(data)
rl = len(out)
print(f'rl:: {rl}')
c=1
for i in str(out):
if i == " " or i=="," or i=="\n":
c +=1
print (f'c:: {c}')
json_out = compress_data(c,inp,out,seed)
out = str(json_out)
try:
json_out=format_json(json_out)
except Exception as e:
print (e)
history.clear()
history = [(inp,"Building Chart...")]
yield "",history,chart_out,chart_out,json_out,""
chart_out = get_chart(str(json_out),seed)
chart_list=chart_out.split("\n")
go=True
cnti=1
line_out=""
for ii, line in enumerate(chart_list):
if go:
line=line.strip().replace('"',"")
if "```" in chart_list[ii]:
while True:
line_out+=chart_list[ii+cnti].strip().replace("\n"," ").replace('"',"").replace("/"," ").replace("."," ").replace(":"," ").replace("#","")
if not line_out.strip().endswith(";"):
line_out+=";"
line_out+="\n"
cnti+=1
if "```" in chart_list[ii+cnti]:
go=False
break
chart_html,chart_url=mm(line_out,1)
#print(chart_out)
else:
rawp = "Provide a valid data source"
history.clear()
history.append((inp,chart_out))
yield "", history,chart_html,line_out,json_out,chart_url
#################################
def clear_fn():
return "",[(None,None)]
def create_image(url):
print(url)
with open("tmp.svg","w") as svg:
svg.write(url)
with open("tmp.svg", "rb") as f:
encoded_image = base64.b64encode(f.read())
this = Image.open("tmp.svg")
out_im = this.save("tmp.png")
with open("image.png","wb") as file:
#file.write(eval(encoded_image))
file.write(encoded_image)
#output = cairosvg.svg2png(
# bytestring=open('tmp.svg').read().encode('utf-8'), write_to="output.png")
return "tmp.png"
score_js="""
function(text_input) {
console.log(text_input);
const iframe = document.getElementById("frame").contentWindow.document.getElementById('chart').innerHTML;
console.log(iframe);
return [iframe];
}
"""
def zoom_update(inp):
this.clear()
this.append(str(inp))
return gr.update()
with gr.Blocks() as app:
gr.HTML("""<center><h1>Text -to- Chart</h1><h3>Mixtral 8x7B</h3>""")
chatbot = gr.Chatbot(label="Mixtral 8x7B Chatbot",show_copy_button=True)
with gr.Row():
with gr.Column(scale=3):
prompt=gr.Textbox(label = "Instructions (optional)")
with gr.Column(scale=1):
button=gr.Button()
#models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True)
with gr.Row():
stop_button=gr.Button("Stop")
clear_btn = gr.Button("Clear")
with gr.Row():
with gr.Tab("Text"):
data=gr.Textbox(label="Input Data (paste text)", lines=6)
with gr.Tab("File"):
file=gr.Files(label="Input File(s) (.pdf .txt)")
with gr.Tab("Folder"):
directory=gr.File(label="Folder", file_count='directory')
with gr.Tab("Raw HTML"):
url = gr.Textbox(label="URL")
with gr.Tab("PDF URL"):
pdf_url = gr.Textbox(label="PDF URL")
with gr.Tab("PDF Batch"):
pdf_batch = gr.Textbox(label="PDF URL Batch (comma separated)")
m_box=gr.HTML()
e_box=gr.Textbox(label="Graph Code",interactive=True)
with gr.Row():
upd_button=gr.Button("Update Chart")
create_im=gr.Button("Create Image")
with gr.Row():
with gr.Column(scale=3):
svg_img=gr.Image()
with gr.Column(scale=1):
wid=gr.Number(label="Width",value=1000)
hgt=gr.Number(label="Height",value=4000)
seed_slider=gr.Slider(label="Seed",step=1,minimum=1,maximum=9999999999999999999,value=1,interactive=True)
zoom_btn=gr.Slider(label="Zoom",step=0.01,minimum=0.1,maximum=20,value=1,interactive=True)
url_box=gr.Textbox(label="Graph URL",interactive=True)
json_out=gr.JSON()
#text=gr.JSON()
#get_score.click(return_score,score,[score],_js=score_js)
score=gr.Textbox()
def return_score(text):
print(text)
return text
create_im.click(create_ss,[e_box,wid,hgt],svg_img)
#create_im.click(return_score,score,[score],_js=score_js).then(create_image,score,svg_img)
#zoom_btn.change(zoom_update,zoom_btn,None)
upd_button.click(mm,[e_box,zoom_btn],[m_box,url_box])
#inp_query.change(search_models,inp_query,models_dd)
clear_btn.click(clear_fn,None,[prompt,chatbot])
#go=button.click(summarize,[prompt,chatbot,report_check,chart_check,data,file,directory,url,pdf_url,pdf_batch],[prompt,chatbot,e_box,json_out])
go=button.click(summarize,[prompt,chatbot,seed_slider,data,file,directory,url,pdf_url,pdf_batch],[prompt,chatbot,m_box,e_box,json_out,url_box])
stop_button.click(None,None,None,cancels=[go])
#app.queue(default_concurrency_limit=20).launch(show_api=False)
app.queue(default_concurrency_limit=20).launch(show_api=False)