Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -39,13 +39,6 @@ def process_text_input(api_key, text_prompt, selected_voice):
|
|
39 |
except Exception as e:
|
40 |
return f"Error: {str(e)}", None
|
41 |
|
42 |
-
def get_audio_format(audio_path):
|
43 |
-
"""Determine audio format from file extension"""
|
44 |
-
_, ext = os.path.splitext(audio_path)
|
45 |
-
if ext.lower() == '.m4a':
|
46 |
-
return 'm4a'
|
47 |
-
return 'wav' # Default to wav for all other formats
|
48 |
-
|
49 |
def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
50 |
"""Process audio input and generate a response"""
|
51 |
try:
|
@@ -60,9 +53,6 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
|
60 |
audio_data = audio_file.read()
|
61 |
encoded_audio = base64.b64encode(audio_data).decode('utf-8')
|
62 |
|
63 |
-
# Determine audio format
|
64 |
-
audio_format = get_audio_format(audio_path)
|
65 |
-
|
66 |
# Create message content with both text and audio
|
67 |
message_content = []
|
68 |
|
@@ -76,7 +66,7 @@ def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
|
76 |
"type": "input_audio",
|
77 |
"input_audio": {
|
78 |
"data": encoded_audio,
|
79 |
-
"format":
|
80 |
}
|
81 |
})
|
82 |
|
@@ -202,7 +192,6 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
|
|
202 |
label="Audio Input",
|
203 |
type="filepath",
|
204 |
sources=["microphone", "upload"]
|
205 |
-
# Removed the invalid parameter 'file_types'
|
206 |
)
|
207 |
example_btn = gr.Button("Use Example Audio")
|
208 |
|
@@ -310,7 +299,7 @@ with gr.Blocks(title="OpenAI Audio Chat App") as app:
|
|
310 |
## Notes:
|
311 |
- You must provide your OpenAI API key in the field above
|
312 |
- The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
|
313 |
-
- Audio inputs
|
314 |
- Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
|
315 |
- Each audio response is automatically transcribed for verification
|
316 |
""")
|
|
|
39 |
except Exception as e:
|
40 |
return f"Error: {str(e)}", None
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
|
43 |
"""Process audio input and generate a response"""
|
44 |
try:
|
|
|
53 |
audio_data = audio_file.read()
|
54 |
encoded_audio = base64.b64encode(audio_data).decode('utf-8')
|
55 |
|
|
|
|
|
|
|
56 |
# Create message content with both text and audio
|
57 |
message_content = []
|
58 |
|
|
|
66 |
"type": "input_audio",
|
67 |
"input_audio": {
|
68 |
"data": encoded_audio,
|
69 |
+
"format": "wav"
|
70 |
}
|
71 |
})
|
72 |
|
|
|
192 |
label="Audio Input",
|
193 |
type="filepath",
|
194 |
sources=["microphone", "upload"]
|
|
|
195 |
)
|
196 |
example_btn = gr.Button("Use Example Audio")
|
197 |
|
|
|
299 |
## Notes:
|
300 |
- You must provide your OpenAI API key in the field above
|
301 |
- The model used is `gpt-4o-audio-preview` for conversation and `gpt-4o-transcribe` for transcriptions
|
302 |
+
- Audio inputs should be in WAV format
|
303 |
- Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
|
304 |
- Each audio response is automatically transcribed for verification
|
305 |
""")
|