import json import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer def predict_NuExtract(model, tokenizer, text, template, batch_size=1, max_length=10_000, max_new_tokens=4_000): template = json.dumps(json.loads(template), indent=4) prompt = f"""<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>""" with torch.no_grad(): encoding = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=max_length).to(model.device) pred_ids = model.generate(**encoding, max_new_tokens=max_new_tokens) output = tokenizer.decode(pred_ids[0], skip_special_tokens=True) return output.split("<|output|>")[1] if "<|output|>" in output else output def generate_response(extracted_data): try: data = json.loads(extracted_data) entities = data.get("Entities", {}) response = (f"I checked the logs for the user. This user was accessing the app through our {entities.get('App version', 'Unknown')} app " f"(Wind Creek Casino app). {entities.get('Issue', 'an issue occurred')} on {entities.get('Date', 'an unknown date')} " f"because {entities.get('Reason', 'no specific reason provided')}. This is working as designed, " f"{', '.join(entities.get('Action', ['no action required']))}.") return response except json.JSONDecodeError: return "Error in processing extracted data. Please check the input format." model_name = "numind/NuExtract-v1.5" device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True).to(device).eval() tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) def extract_information(text, template): extracted_data = predict_NuExtract(model, tokenizer, text, template) return generate_response(extracted_data) demo = gr.Interface( fn=extract_information, inputs=[ gr.Textbox(label="Enter Text", lines=5, placeholder="Enter text to extract information from..."), gr.Textbox(label="Enter Template", lines=10, placeholder="Enter JSON extraction template...") ], outputs=gr.Textbox(label="Generated Response"), title="NuExtract Information Extractor", description="Enter a text and a JSON template to extract structured information and generate a response using NuExtract.", ) # Patch Phi-3.5-mini-instruct's `prepare_inputs_for_generation` def patched_prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): if past_key_values is not None: max_cache_length = len(past_key_values[0][0]) else: max_cache_length = None return self._default_prepare_inputs_for_generation(input_ids, past_key_values, **kwargs) # Apply the patch dynamically if hasattr(model, "prepare_inputs_for_generation"): model._default_prepare_inputs_for_generation = model.prepare_inputs_for_generation model.prepare_inputs_for_generation = patched_prepare_inputs_for_generation.__get__(model) demo.launch(share=True)