Spaces:

Lakshay121
/

NeuroReply

Runtime error

App Files Files Community

NeuroReply / app.py

Lakshay121

made changes in max_cache_length

6f67f6d verified 2 months ago

raw

history blame contribute delete

3.15 kB

	import json
	import torch
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer

	def predict_NuExtract(model, tokenizer, text, template, batch_size=1, max_length=10_000, max_new_tokens=4_000):
	template = json.dumps(json.loads(template), indent=4)
	prompt = f"""<\|input\|>\n### Template:\n{template}\n### Text:\n{text}\n\n<\|output\|>"""

	with torch.no_grad():
	encoding = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=max_length).to(model.device)
	pred_ids = model.generate(**encoding, max_new_tokens=max_new_tokens)
	output = tokenizer.decode(pred_ids[0], skip_special_tokens=True)

	return output.split("<\|output\|>")[1] if "<\|output\|>" in output else output

	def generate_response(extracted_data):
	try:
	data = json.loads(extracted_data)
	entities = data.get("Entities", {})
	response = (f"I checked the logs for the user. This user was accessing the app through our {entities.get('App version', 'Unknown')} app "
	f"(Wind Creek Casino app). {entities.get('Issue', 'an issue occurred')} on {entities.get('Date', 'an unknown date')} "
	f"because {entities.get('Reason', 'no specific reason provided')}. This is working as designed, "
	f"{', '.join(entities.get('Action', ['no action required']))}.")
	return response
	except json.JSONDecodeError:
	return "Error in processing extracted data. Please check the input format."

	model_name = "numind/NuExtract-v1.5"
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True).to(device).eval()
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	def extract_information(text, template):
	extracted_data = predict_NuExtract(model, tokenizer, text, template)
	return generate_response(extracted_data)

	demo = gr.Interface(
	fn=extract_information,
	inputs=[
	gr.Textbox(label="Enter Text", lines=5, placeholder="Enter text to extract information from..."),
	gr.Textbox(label="Enter Template", lines=10, placeholder="Enter JSON extraction template...")
	],
	outputs=gr.Textbox(label="Generated Response"),
	title="NuExtract Information Extractor",
	description="Enter a text and a JSON template to extract structured information and generate a response using NuExtract.",
	)


	# Patch Phi-3.5-mini-instruct's `prepare_inputs_for_generation`
	def patched_prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs):
	if past_key_values is not None:
	max_cache_length = len(past_key_values[0][0])
	else:
	max_cache_length = None
	return self._default_prepare_inputs_for_generation(input_ids, past_key_values, **kwargs)

	# Apply the patch dynamically
	if hasattr(model, "prepare_inputs_for_generation"):
	model._default_prepare_inputs_for_generation = model.prepare_inputs_for_generation
	model.prepare_inputs_for_generation = patched_prepare_inputs_for_generation.__get__(model)

	demo.launch(share=True)