aimeri commited on
Commit
9e14c66
·
1 Parent(s): 513e0c6

Improve chat history formatting in process_input and create_demo functions in app.py to enhance user experience with multimodal inputs, ensuring clear display of uploaded content types.

Browse files
Files changed (1) hide show
  1. app.py +26 -10
app.py CHANGED
@@ -42,7 +42,7 @@ def process_input(image, audio, video, text, chat_history, voice_type, enable_au
42
  "video": video if video is not None else None
43
  }
44
 
45
- # Prepare conversation history
46
  conversation = [SYSTEM_PROMPT]
47
 
48
  # Add previous chat history
@@ -103,8 +103,17 @@ def process_input(image, audio, video, text, chat_history, voice_type, enable_au
103
  # Clean up text response
104
  text_response = text_response.strip()
105
 
106
- # Update chat history
107
- chat_history.append((user_input, text_response))
 
 
 
 
 
 
 
 
 
108
 
109
  # Prepare output
110
  if enable_audio_output and audio_path:
@@ -221,7 +230,7 @@ def create_demo():
221
 
222
  # Text input handling
223
  text_submit.click(
224
- fn=lambda text: {"text": text},
225
  inputs=text_input,
226
  outputs=[chatbot],
227
  queue=False
@@ -233,12 +242,19 @@ def create_demo():
233
 
234
  # Multimodal input handling
235
  def prepare_multimodal_input(image, audio, video, text):
236
- return {
237
- "text": text,
238
- "image": image,
239
- "audio": audio,
240
- "video": video
241
- }
 
 
 
 
 
 
 
242
 
243
  multimodal_submit.click(
244
  fn=prepare_multimodal_input,
 
42
  "video": video if video is not None else None
43
  }
44
 
45
+ # Prepare conversation history for model processing
46
  conversation = [SYSTEM_PROMPT]
47
 
48
  # Add previous chat history
 
103
  # Clean up text response
104
  text_response = text_response.strip()
105
 
106
+ # Format user message for chat history display
107
+ user_message_for_display = text
108
+ if image is not None:
109
+ user_message_for_display = (user_message_for_display or "Image uploaded") + " [Image]"
110
+ if audio is not None:
111
+ user_message_for_display = (user_message_for_display or "Audio uploaded") + " [Audio]"
112
+ if video is not None:
113
+ user_message_for_display = (user_message_for_display or "Video uploaded") + " [Video]"
114
+
115
+ # Update chat history with properly formatted entries
116
+ chat_history.append((user_message_for_display, text_response))
117
 
118
  # Prepare output
119
  if enable_audio_output and audio_path:
 
230
 
231
  # Text input handling
232
  text_submit.click(
233
+ fn=lambda text: text,
234
  inputs=text_input,
235
  outputs=[chatbot],
236
  queue=False
 
242
 
243
  # Multimodal input handling
244
  def prepare_multimodal_input(image, audio, video, text):
245
+ # Create a display message that indicates what was uploaded
246
+ display_message = text or ""
247
+ if image is not None:
248
+ display_message = (display_message + " " if display_message else "") + "[Image]"
249
+ if audio is not None:
250
+ display_message = (display_message + " " if display_message else "") + "[Audio]"
251
+ if video is not None:
252
+ display_message = (display_message + " " if display_message else "") + "[Video]"
253
+
254
+ if not display_message:
255
+ display_message = "Multimodal content"
256
+
257
+ return display_message
258
 
259
  multimodal_submit.click(
260
  fn=prepare_multimodal_input,