|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
import torch |
|
|
|
|
|
|
|
model_path_hf = 'paacamo/EleutherAI-pythia-1b-finetuned-nvidia-faq' |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path_hf) |
|
model = AutoModelForCausalLM.from_pretrained(model_path_hf) |
|
|
|
|
|
text_generation = pipeline('text-generation', |
|
model=model, |
|
tokenizer=tokenizer, |
|
device=0 if torch.cuda.is_available() else -1) |
|
|
|
|
|
def respond_chat(message): |
|
|
|
prompt = f"###Question: {message} \n###Answer:" |
|
|
|
|
|
response = text_generation(prompt, max_new_tokens=100, do_sample=True) |
|
return response[0]['generated_text'].split('###Answer:')[1] |
|
|
|
|
|
|
|
demo = gr.Interface( |
|
fn=respond_chat, |
|
inputs='text', |
|
outputs='text', |
|
title="NVIDIA FAQ Chatbot", |
|
description="Ask your question about NVIDIA products and services." |
|
) |
|
|
|
|
|
demo.launch(debug=True) |
|
|