Lakshay121 commited on
Commit
8e00884
·
verified ·
1 Parent(s): a22b418

requirements.txt

Browse files

gradio
torch
transformers

Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ def predict_NuExtract(model, tokenizer, text, template, batch_size=1, max_length=10_000, max_new_tokens=4_000):
7
+ template = json.dumps(json.loads(template), indent=4)
8
+ prompt = f"""<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"""
9
+
10
+ with torch.no_grad():
11
+ encoding = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=max_length).to(model.device)
12
+ pred_ids = model.generate(**encoding, max_new_tokens=max_new_tokens)
13
+ output = tokenizer.decode(pred_ids[0], skip_special_tokens=True)
14
+
15
+ return output.split("<|output|>")[1] if "<|output|>" in output else output
16
+
17
+ def generate_response(extracted_data):
18
+ try:
19
+ data = json.loads(extracted_data)
20
+ entities = data.get("Entities", {})
21
+ response = (f"I checked the logs for the user. This user was accessing the app through our {entities.get('App version', 'Unknown')} app "
22
+ f"(Wind Creek Casino app). {entities.get('Issue', 'an issue occurred')} on {entities.get('Date', 'an unknown date')} "
23
+ f"because {entities.get('Reason', 'no specific reason provided')}. This is working as designed, "
24
+ f"{', '.join(entities.get('Action', ['no action required']))}.")
25
+ return response
26
+ except json.JSONDecodeError:
27
+ return "Error in processing extracted data. Please check the input format."
28
+
29
+ model_name = "numind/NuExtract-v1.5"
30
+ device = "cuda" if torch.cuda.is_available() else "cpu"
31
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True).to(device).eval()
32
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
33
+
34
+ def extract_information(text, template):
35
+ extracted_data = predict_NuExtract(model, tokenizer, text, template)
36
+ return generate_response(extracted_data)
37
+
38
+ demo = gr.Interface(
39
+ fn=extract_information,
40
+ inputs=[
41
+ gr.Textbox(label="Enter Text", lines=5, placeholder="Enter text to extract information from..."),
42
+ gr.Textbox(label="Enter Template", lines=10, placeholder="Enter JSON extraction template...")
43
+ ],
44
+ outputs=gr.Textbox(label="Generated Response"),
45
+ title="NuExtract Information Extractor",
46
+ description="Enter a text and a JSON template to extract structured information and generate a response using NuExtract.",
47
+ )
48
+
49
+ demo.launch(share=True)