First_agent_uasername

Running

App Files Files Community

uasername commited on Feb 21

Commit

4d68d72

verified ·

1 Parent(s): f9c1999

Update Gradio_UI.py

Browse files

Files changed (1) hide show

Gradio_UI.py +25 -4

Gradio_UI.py CHANGED Viewed

@@ -26,6 +26,15 @@ from smolagents.utils import _is_package_available
 from Code_Functions import speak_text
 #from jokes import gradio_search_jokes
@@ -185,11 +194,16 @@ def stream_to_gradio(
             content={"path": final_answer.to_string(), "mime_type": "image/png"},
         )
     elif isinstance(final_answer, AgentAudio):
         yield gr.ChatMessage(
             role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "audio/mpeg"},
-            #content={"data": final_answer.value, "mime_type": "audio/mpeg"},
         )
     else:
         yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
@@ -313,7 +327,14 @@ class GradioUI:
                 if isinstance(msg.content, dict):
                     mime = msg.content.get("mime_type", "")
                     if mime.startswith("audio"):
-                        return msg.content.get("path")
             return None
         with gr.Blocks(fill_height=True) as demo:
@@ -331,7 +352,7 @@ class GradioUI:
             )
             # NEW: Add a dedicated audio player component below the chatbot.
-            audio_player = gr.Audio(label="Audio Pronunciation", type="filepath")
             if self.file_upload_folder is not None:
                 upload_file = gr.File(label="Upload a file")

 from Code_Functions import speak_text
+import io
+import librosa
+import numpy as np
+def mp3_bytes_to_numpy(audio_bytes, sr=None):
+    # Load audio from the MP3 bytes; sr=None preserves the original sample rate.
+    audio_np, sr = librosa.load(io.BytesIO(audio_bytes), sr=sr)
+    return audio_np
 #from jokes import gradio_search_jokes
             content={"path": final_answer.to_string(), "mime_type": "image/png"},
         )
     elif isinstance(final_answer, AgentAudio):
+        # Assuming your AgentAudio object stores the raw MP3 bytes in an attribute called "value"
+        audio_bytes = final_answer.value
+        # Convert MP3 bytes to a numpy array using our helper function
+        audio_np = mp3_bytes_to_numpy(audio_bytes)
         yield gr.ChatMessage(
             role="assistant",
+            content={"data": audio_np, "mime_type": "audio/mpeg"},
         )
+        print("DEBUG AgentAudio attributes:", vars(final_answer))
     else:
         yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
                 if isinstance(msg.content, dict):
                     mime = msg.content.get("mime_type", "")
                     if mime.startswith("audio"):
+                       # If the audio data is already provided under "data", return it.
+                        if "data" in msg.content:
+                            return msg.content["data"]
+                        # Otherwise, if a file path is provided (fallback), load and convert it.
+                        elif "path" in msg.content:
+                            with open(msg.content["path"], "rb") as f:
+                                audio_bytes = f.read()
+                            return mp3_bytes_to_numpy(audio_bytes)
             return None
         with gr.Blocks(fill_height=True) as demo:
             )
             # NEW: Add a dedicated audio player component below the chatbot.
+            audio_player = gr.Audio(label="Audio Pronunciation", type="numpy")
             if self.file_upload_folder is not None:
                 upload_file = gr.File(label="Upload a file")