import gradio as gr | |
from paddlenlp.transformers import AutoTokenizer, AutoModelForCausalLM | |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") | |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct", dtype="float32") | |
def inference(input_text): | |
print(input_text) | |
print(type(input_text)) | |
input_features = tokenizer(input_text, return_tensors="pd") | |
outputs = model.generate(**input_features, max_new_tokens=128)#max_length=128) | |
output_text = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)[0] | |
return output_text | |
title = 'PaddlePaddle Meets LLM' | |
description = ''' | |
- The underlying execution framework is based on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) and [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP). | |
- PaddleNLP supports a wide range of open-source LLMs. Check out the [full model list](https://github.com/PaddlePaddle/PaddleNLP?tab=readme-ov-file#%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81). | |
- We chose QWEN2-0.5B-Instruct as the model for this use case due to limited computational resources. | |
- [ERNIE 4.5](https://yiyan.baidu.com/) was trained with PaddlePaddle, [give it a try](https://huggingface.co/spaces/PaddlePaddle/ernie_demo)! | |
''' | |
examples = ['请自我介绍一下。'] | |
demo = gr.Interface( | |
inference, | |
inputs="text", | |
outputs="text", | |
title=title, | |
description=description, | |
examples=examples, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |