import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch # Model Info # model_path_local = '/Users/heykalsayid/Desktop/skill-academy/projects/ai-porto/deployment/app/model/eleutherai-finetuned' model_path_hf = 'paacamo/EleutherAI-pythia-1b-finetuned-nvidia-faq' # model from hugging face # set tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_path_hf) model = AutoModelForCausalLM.from_pretrained(model_path_hf) # set pipeline for text generation text_generation = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) def respond_chat(message): # prompt from model template for better response prompt = f"###Question: {message} \n###Answer:" # response from the model response = text_generation(prompt, max_new_tokens=100, do_sample=True) return response[0]['generated_text'].split('###Answer:')[1] # start gradio interface demo = gr.Interface( fn=respond_chat, inputs='text', outputs='text', title="NVIDIA FAQ Chatbot", description="Ask your question about NVIDIA products and services." ) # main function to launch the app demo.launch(debug=True)