eysflowllm / app.py
agilan1102's picture
Update app.py
2d79dbb verified
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel
import gradio as gr
from huggingface_hub import login
import os
# Retrieve the token from the environment variable and log in
hf_token = os.environ.get("HF_TOKEN")
if hf_token is None:
raise ValueError("HF_TOKEN environment variable not found. Please check your Space secrets.")
login(token=hf_token)
# Define model paths
base_model_name = "meta-llama/Llama-3.2-3B-Instruct"
lora_adapter_path = "agilan1102/eysflow_adapters"
# Load tokenizer and models
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=True)
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
use_auth_token=True
)
model_with_adapter = PeftModel.from_pretrained(base_model, lora_adapter_path, use_auth_token=True)
def generate_text_adapter(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to(model_with_adapter.device)
outputs = model_with_adapter.generate(**inputs, max_new_tokens=500)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
return result
# Create Gradio interface
demo = gr.Interface(
fn=generate_text_adapter,
inputs="text",
outputs="text",
title="My Finetuned LLM API"
)
# Launch the interface
demo.launch()