Qwen2.5-Omni-7B-Demo

Runtime error

aimeri commited on Mar 28

Commit

f0ab3ba

1 Parent(s): f14a0ac

Refactor process_input function in app.py to handle multimodal inputs (image, audio, video, text) and update demo creation logic accordingly.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,7 +33,15 @@ VOICE_OPTIONS = {
 }
 @spaces.GPU
-def process_input(user_input, chat_history, voice_type, enable_audio_output):
     # Prepare conversation history
     conversation = [SYSTEM_PROMPT]
@@ -213,7 +221,7 @@ def create_demo():
             queue=False
         ).then(
             fn=process_input,
-            inputs=[text_input, chatbot, voice_type, enable_audio_output],
             outputs=[chatbot, text_output, audio_output]
         )
@@ -233,7 +241,7 @@ def create_demo():
             queue=False
         ).then(
             fn=process_input,
-            inputs=[{"image": image_input, "audio": audio_input, "video": video_input, "text": additional_text},
                    chatbot, voice_type, enable_audio_output],
             outputs=[chatbot, text_output, audio_output]
         )

 }
 @spaces.GPU
+def process_input(image, audio, video, text, chat_history, voice_type, enable_audio_output):
+    # Combine multimodal inputs
+    user_input = {
+        "text": text,
+        "image": image,
+        "audio": audio,
+        "video": video
+    }
     # Prepare conversation history
     conversation = [SYSTEM_PROMPT]
             queue=False
         ).then(
             fn=process_input,
+            inputs=[None, None, None, text_input, chatbot, voice_type, enable_audio_output],
             outputs=[chatbot, text_output, audio_output]
         )
             queue=False
         ).then(
             fn=process_input,
+            inputs=[image_input, audio_input, video_input, additional_text,
                    chatbot, voice_type, enable_audio_output],
             outputs=[chatbot, text_output, audio_output]
         )