Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from peft import PeftModel | |
# Load base model & tokenizer | |
base_model = "vilsonrodrigues/falcon-7b-instruct-sharded" | |
tokenizer = AutoTokenizer.from_pretrained(base_model) | |
model = AutoModelForCausalLM.from_pretrained(base_model, device_map="cpu", torch_dtype=torch.float32) | |
# Load LoRA adapter | |
adapter_path = "./model" | |
model = PeftModel.from_pretrained(model, adapter_path) | |
def generate_response(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt").to("cpu") | |
with torch.no_grad(): | |
outputs = model.generate(**inputs, max_length=200) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(label="Ask AI"), | |
outputs=gr.Textbox(label="Answer"), | |
title="Financial AI Chatbot", | |
description="Fine-tuned Falcon 7B Model for QnA Finansial." | |
) | |
if __name__ == "__main__": | |
interface.launch() | |