import gradio as gr from paddlenlp.transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct", dtype="float32") def inference(input_text): print(input_text) print(type(input_text)) input_features = tokenizer(input_text, return_tensors="pd") outputs = model.generate(**input_features, max_new_tokens=128)#max_length=128) output_text = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)[0] return output_text title = 'PaddlePaddle Meets LLM' description = ''' - The underlying execution framework is based on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) and [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP). - PaddleNLP supports a wide range of open-source LLMs. Check out the [full model list](https://github.com/PaddlePaddle/PaddleNLP?tab=readme-ov-file#%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81). - We chose QWEN2-0.5B-Instruct as the model for this use case due to limited computational resources. - [ERNIE 4.5](https://yiyan.baidu.com/) was trained with PaddlePaddle, [give it a try](https://huggingface.co/spaces/PaddlePaddle/ernie_demo)! ''' examples = ['请自我介绍一下。'] demo = gr.Interface( inference, inputs="text", outputs="text", title=title, description=description, examples=examples, ) if __name__ == "__main__": demo.launch()