yilunzhao's picture
Update app.py
1f5730a verified
raw
history blame contribute delete
1.89 kB
import os
import gradio as gr
import torch
import spaces
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load model and tokenizer if a GPU is available
if torch.cuda.is_available():
model_id = "allenai/OLMo-7B-hf"
adapters_name = "yilunzhao/olmo-finetuned"
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
model = PeftModel.from_pretrained(model, adapters_name)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
else:
raise EnvironmentError("CUDA device not available. Please run on a GPU-enabled environment.")
# Basic function to generate response based on passage and question
@spaces.GPU
def generate_response(passage: str, question: str) -> str:
# Prepare the input text by combining the passage and question
message = [f"Passage: {passage}\nQuestion: {question}\nAnswer:"]
inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False).to('cuda')
response = model.generate(**inputs, max_new_tokens=100)
response = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
response = response[len(message[0]):].strip().split('\n')[0]
return response
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# Passage and Question Response Generator")
passage_input = gr.Textbox(label="Passage", placeholder="Enter the passage here", lines=5)
question_input = gr.Textbox(label="Question", placeholder="Enter the question here", lines=2)
output_box = gr.Textbox(label="Response", placeholder="Model's response will appear here")
submit_button = gr.Button("Generate Response")
submit_button.click(fn=generate_response, inputs=[passage_input, question_input], outputs=output_box)
# Run the app
if __name__ == "__main__":
demo.launch()