Update app.py
Browse files
app.py
CHANGED
@@ -4,49 +4,54 @@ from google import genai
|
|
4 |
from google.genai import types
|
5 |
import gradio as gr
|
6 |
|
7 |
-
# Set your Gemini API key (configure via Hugging Face Secrets)
|
8 |
API_KEY = os.getenv("GEMINI_API_KEY")
|
9 |
-
|
10 |
client = genai.Client(api_key=API_KEY)
|
11 |
|
12 |
async def generate_audio(text):
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
|
|
|
|
39 |
|
40 |
def tts(text):
|
41 |
if not text.strip():
|
42 |
return None
|
43 |
try:
|
44 |
-
asyncio.run(generate_audio(text))
|
45 |
-
return "output.wav"
|
46 |
except Exception as e:
|
47 |
-
return f"Error: {str(e)}"
|
48 |
|
49 |
-
# Gradio Interface
|
50 |
iface = gr.Interface(
|
51 |
fn=tts,
|
52 |
inputs=gr.Textbox(label="Enter Text", placeholder="Type here..."),
|
|
|
4 |
from google.genai import types
|
5 |
import gradio as gr
|
6 |
|
|
|
7 |
API_KEY = os.getenv("GEMINI_API_KEY")
|
|
|
8 |
client = genai.Client(api_key=API_KEY)
|
9 |
|
10 |
async def generate_audio(text):
|
11 |
+
try:
|
12 |
+
config = types.LiveConnectConfig(
|
13 |
+
response_modalities=["audio"],
|
14 |
+
speech_config=types.SpeechConfig(
|
15 |
+
voice_config=types.VoiceConfig(
|
16 |
+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
17 |
+
voice_name="Puck"
|
18 |
+
)
|
19 |
+
)
|
20 |
+
),
|
21 |
+
# Fixed Part.from_text() usage (positional argument)
|
22 |
+
system_instruction=types.Content(
|
23 |
+
parts=[types.Part.from_text("Repeat user input exactly without explanation")],
|
24 |
+
role="user"
|
25 |
+
),
|
26 |
+
)
|
27 |
+
|
28 |
+
audio_data = b""
|
29 |
+
async with client.aio.live.connect(model="models/gemini-2.0-flash-exp", config=config) as session:
|
30 |
+
await session.send(input=text, end_of_turn=True)
|
31 |
+
async for response in session.receive():
|
32 |
+
if data := response.data:
|
33 |
+
audio_data += data
|
34 |
+
|
35 |
+
# Save audio with proper WAV header
|
36 |
+
with open("output.wav", "wb") as f:
|
37 |
+
# WAV header for 24kHz 16-bit PCM
|
38 |
+
f.write(b'RIFF\x00\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x7d\x00\x00\x02\x00\x10\x00data\x00\x00\x00\x00')
|
39 |
+
f.write(audio_data)
|
40 |
+
|
41 |
+
return "output.wav"
|
42 |
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Error: {str(e)}")
|
45 |
+
raise # Re-raise to trigger Gradio's error handling
|
46 |
|
47 |
def tts(text):
|
48 |
if not text.strip():
|
49 |
return None
|
50 |
try:
|
51 |
+
return asyncio.run(generate_audio(text))
|
|
|
52 |
except Exception as e:
|
53 |
+
return f"Error: {str(e)}" # Now Gradio will show this message instead of trying to play it
|
54 |
|
|
|
55 |
iface = gr.Interface(
|
56 |
fn=tts,
|
57 |
inputs=gr.Textbox(label="Enter Text", placeholder="Type here..."),
|