phi2-qlora / app.py
kishkath's picture
Create app.py
1a2a056 verified
raw
history blame
2.18 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
# Directory where your fine-tuned Phi-2 model and associated files are stored.
# This directory should include files like:
# - adapter_config.json, adapter_model.safetensors,
# - tokenizer_config.json, tokenizer.json, merges.txt,
# - special_tokens_map.json, vocab.json, added_tokens.json, etc.
model_dir = "./phi2-finetune"
# Load the tokenizer.
tokenizer = AutoTokenizer.from_pretrained(model_dir)
# Load the base model. (Assumes the base model files are in model_dir.)
base_model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto")
# Load the adapter (PEFT) weights.
model = PeftModel.from_pretrained(base_model, model_dir)
def generate_response(prompt, max_new_tokens=200, temperature=0.7):
"""
Generate a response from the fine-tuned Phi-2 model given a prompt.
"""
# Tokenize the prompt and move tensors to the model's device.
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate output text using sampling.
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature
)
# Decode the generated tokens and return the response.
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Create a Gradio interface with example prompts.
demo = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(lines=4, label="Input Prompt"),
gr.Slider(50, 500, value=200, label="Max New Tokens"),
gr.Slider(0.0, 1.0, value=0.7, label="Temperature")
],
outputs=gr.Textbox(label="Response"),
title="Phi-2 Fine-tuned Chat",
description="A Hugging Face Space app serving the fine-tuned Phi-2 model trained on OpenAssistant/oasst1 data.",
examples=[
["Hello, how are you today?", 150, 0.7],
["Translate this sentence from English to French: I love programming.", 200, 0.8],
["Tell me a joke about artificial intelligence.", 180, 0.6]
]
)
if __name__ == "__main__":
demo.launch()