Spaces:

sagar007
/

Multimodal_App

Running on Zero

sagar007 commited on Aug 25, 2024

Commit

dceec72

verified ·

1 Parent(s): 2b390ac

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -222,9 +222,10 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
                          inputs=[msg, chatbot, system_prompt, temperature, max_new_tokens, top_p, top_k],
                          outputs=[chatbot, audio_output])
         clear_btn.click(lambda: (None, None), None, [chatbot, audio_output], queue=False)
     with gr.Tab("Vision Model with TTS (Phi-3.5-vision)"):
-    with gr.Row():
         with gr.Column(scale=1):
             vision_input_img = gr.Image(label="Upload an Image", type="pil")
             vision_text_input = gr.Textbox(label="Ask a question about the image", placeholder="What do you see in this image?")
@@ -236,6 +237,9 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
     vision_submit_btn.click(process_vision_query,
                             inputs=[vision_input_img, vision_text_input],
                             outputs=[vision_output_text, vision_output_audio])
     with gr.Tab("Text-to-Speech (Parler-TTS)"):

                          inputs=[msg, chatbot, system_prompt, temperature, max_new_tokens, top_p, top_k],
                          outputs=[chatbot, audio_output])
         clear_btn.click(lambda: (None, None), None, [chatbot, audio_output], queue=False)
     with gr.Tab("Vision Model with TTS (Phi-3.5-vision)"):
+        with gr.Row():
         with gr.Column(scale=1):
             vision_input_img = gr.Image(label="Upload an Image", type="pil")
             vision_text_input = gr.Textbox(label="Ask a question about the image", placeholder="What do you see in this image?")
     vision_submit_btn.click(process_vision_query,
                             inputs=[vision_input_img, vision_text_input],
                             outputs=[vision_output_text, vision_output_audio])
     with gr.Tab("Text-to-Speech (Parler-TTS)"):