from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch class EndpointHandler: def __init__(self, path=""): self.tokenizer = AutoTokenizer.from_pretrained(path) self.model = AutoModelForSeq2SeqLM.from_pretrained(path) self.model.eval() def __call__(self, data): inputs = data.get("inputs", "") parameters = data.get("parameters", {}) if not inputs: return [{"error": "Missing 'inputs' in payload"}] input_ids = self.tokenizer.encode(inputs, return_tensors="pt") outputs = self.model.generate( input_ids, max_new_tokens=parameters.get("max_new_tokens", 128), do_sample=True, temperature=parameters.get("temperature", 0.7), top_p=parameters.get("top_p", 0.9), ) result = self.tokenizer.decode(outputs[0], skip_special_tokens=True) return [{"generated_text": result}] # ✅ Must return a list