FYP / app.py
AliEssa555's picture
Update app.py
5fe82b2 verified
raw
history blame contribute delete
1.96 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
config = PeftConfig.from_pretrained("AliEssa555/latest-podcast-model-ft")
base_model = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-GPTQ")
model = PeftModel.from_pretrained(base_model, "AliEssa555/latest-podcast-model-ft")
#model_name = "path_to_your_fine_tuned_model" # Use the local path or the Hugging Face model hub ID if published
#model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model)
if torch.cuda.is_available():
model = model.to("cuda")
# Generate a response based on user input
def generate_response(user_input):
# Format the input as an instructional prompt
prompt = f"[INST] User: {user_input} [/INST] Assistant:"
# Tokenize input and generate response
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
output_tokens = model.generate(inputs["input_ids"], max_length=512, temperature=0.7, top_p=0.9, do_sample=True)
# Decode and format the output
response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
return response.split("Assistant:")[-1].strip() # Remove "Assistant:" tag if present
# Define Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## LLM Podcast Response Generator")
with gr.Row():
user_input = gr.Textbox(label="Enter your question related to the podcast:", placeholder="Type your question here...")
with gr.Row():
response_output = gr.Textbox(label="Model's Response")
submit_button = gr.Button("Generate Response")
# Connect button to the function
submit_button.click(fn=generate_response, inputs=user_input, outputs=response_output)
# Launch the Gradio app
demo.launch()