Qwen2.5-Omni-7B-Demo

Runtime error

App Files Files Community

aimeri commited on Mar 28

Commit

9e14c66

1 Parent(s): 513e0c6

Improve chat history formatting in process_input and create_demo functions in app.py to enhance user experience with multimodal inputs, ensuring clear display of uploaded content types.

Browse files

Files changed (1) hide show

app.py +26 -10

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ def process_input(image, audio, video, text, chat_history, voice_type, enable_au
         "video": video if video is not None else None
     }
-    # Prepare conversation history
     conversation = [SYSTEM_PROMPT]
     # Add previous chat history
@@ -103,8 +103,17 @@ def process_input(image, audio, video, text, chat_history, voice_type, enable_au
     # Clean up text response
     text_response = text_response.strip()
-    # Update chat history
-    chat_history.append((user_input, text_response))
     # Prepare output
     if enable_audio_output and audio_path:
@@ -221,7 +230,7 @@ def create_demo():
         # Text input handling
         text_submit.click(
-            fn=lambda text: {"text": text},
             inputs=text_input,
             outputs=[chatbot],
             queue=False
@@ -233,12 +242,19 @@ def create_demo():
         # Multimodal input handling
         def prepare_multimodal_input(image, audio, video, text):
-            return {
-                "text": text,
-                "image": image,
-                "audio": audio,
-                "video": video
-            }
         multimodal_submit.click(
             fn=prepare_multimodal_input,

         "video": video if video is not None else None
     }
+    # Prepare conversation history for model processing
     conversation = [SYSTEM_PROMPT]
     # Add previous chat history
     # Clean up text response
     text_response = text_response.strip()
+    # Format user message for chat history display
+    user_message_for_display = text
+    if image is not None:
+        user_message_for_display = (user_message_for_display or "Image uploaded") + " [Image]"
+    if audio is not None:
+        user_message_for_display = (user_message_for_display or "Audio uploaded") + " [Audio]"
+    if video is not None:
+        user_message_for_display = (user_message_for_display or "Video uploaded") + " [Video]"
+    # Update chat history with properly formatted entries
+    chat_history.append((user_message_for_display, text_response))
     # Prepare output
     if enable_audio_output and audio_path:
         # Text input handling
         text_submit.click(
+            fn=lambda text: text,
             inputs=text_input,
             outputs=[chatbot],
             queue=False
         # Multimodal input handling
         def prepare_multimodal_input(image, audio, video, text):
+            # Create a display message that indicates what was uploaded
+            display_message = text or ""
+            if image is not None:
+                display_message = (display_message + " " if display_message else "") + "[Image]"
+            if audio is not None:
+                display_message = (display_message + " " if display_message else "") + "[Audio]"
+            if video is not None:
+                display_message = (display_message + " " if display_message else "") + "[Video]"
+            if not display_message:
+                display_message = "Multimodal content"
+            return display_message
         multimodal_submit.click(
             fn=prepare_multimodal_input,