import gradio as gr from inference_fine_tune import generate_response # your generator-based inference code # This function streams the response def chat_interface(prompt): return generate_response(prompt) # returns a generator with gr.Blocks() as demo: gr.Markdown("## Chat with the Model") with gr.Row(): inp = gr.Textbox(label="Your Prompt", placeholder="Enter your message...", lines=3) out = gr.Textbox(label="Model Response", lines=10) # Stream response btn = gr.Button("Send") btn.click(chat_interface, inputs=inp, outputs=out) # Launch for Hugging Face Spaces demo.launch(share=True)