import gradio as gr
from inference_fine_tune import generate_response  # your generator-based inference code

# This function streams the response
def chat_interface(prompt):
    return generate_response(prompt)  # returns a generator

with gr.Blocks() as demo:
    gr.Markdown("## Chat with the Model")
    with gr.Row():
        inp = gr.Textbox(label="Your Prompt", placeholder="Enter your message...", lines=3)
    out = gr.Textbox(label="Model Response", lines=10)

    # Stream response
    btn = gr.Button("Send")
    btn.click(chat_interface, inputs=inp, outputs=out)

# Launch for Hugging Face Spaces
demo.launch(share=True)