Spaces:

shukdevdatta123
/

WaveTalk

Sleeping

App Files Files Community

shukdevdatta123 commited on 14 days ago

Commit

04c9b90

verified ·

1 Parent(s): 956dfd8

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -13

app.py CHANGED Viewed

@@ -39,13 +39,6 @@ def process_text_input(api_key, text_prompt, selected_voice):
     except Exception as e:
         return f"Error: {str(e)}", None
-def get_audio_format(audio_path):
-    """Determine audio format from file extension"""
-    _, ext = os.path.splitext(audio_path)
-    if ext.lower() == '.m4a':
-        return 'm4a'
-    return 'wav'  # Default to wav for all other formats
 def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
     """Process audio input and generate a response"""
     try:
@@ -60,9 +53,6 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
             audio_data = audio_file.read()
         encoded_audio = base64.b64encode(audio_data).decode('utf-8')
-        # Determine audio format
-        audio_format = get_audio_format(audio_path)
         # Create message content with both text and audio
         message_content = []
@@ -76,7 +66,7 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
             "type": "input_audio",
             "input_audio": {
                 "data": encoded_audio,
-                "format": audio_format
             }
         })
@@ -202,7 +192,6 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
                     label="Audio Input",
                     type="filepath",
                     sources=["microphone", "upload"]
-                    # Removed the invalid parameter 'file_types'
                 )
                 example_btn = gr.Button("Use Example Audio")
@@ -310,7 +299,7 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
     ## Notes:
     - You must provide your OpenAI API key in the field above
     - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
-    - Audio inputs can be in WAV or M4A format
     - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
     - Each audio response is automatically transcribed for verification
     """)

     except Exception as e:
         return f"Error: {str(e)}", None
 def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
     """Process audio input and generate a response"""
     try:
             audio_data = audio_file.read()
         encoded_audio = base64.b64encode(audio_data).decode('utf-8')
         # Create message content with both text and audio
         message_content = []
             "type": "input_audio",
             "input_audio": {
                 "data": encoded_audio,
+                "format": "wav"
             }
         })
                     label="Audio Input",
                     type="filepath",
                     sources=["microphone", "upload"]
                 )
                 example_btn = gr.Button("Use Example Audio")
     ## Notes:
     - You must provide your OpenAI API key in the field above
     - The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
+    - Audio inputs should be in WAV format
     - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
     - Each audio response is automatically transcribed for verification
     """)