Athspi commited on
Commit
43ac355
·
verified ·
1 Parent(s): b8a34b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -31
app.py CHANGED
@@ -4,49 +4,54 @@ from google import genai
4
  from google.genai import types
5
  import gradio as gr
6
 
7
- # Set your Gemini API key (configure via Hugging Face Secrets)
8
  API_KEY = os.getenv("GEMINI_API_KEY")
9
-
10
  client = genai.Client(api_key=API_KEY)
11
 
12
  async def generate_audio(text):
13
- config = types.LiveConnectConfig(
14
- response_modalities=["audio"],
15
- speech_config=types.SpeechConfig(
16
- voice_config=types.VoiceConfig(
17
- prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck")
18
- )
19
- ),
20
- system_instruction=types.Content(
21
- parts=[types.Part.from_text("Repeat user input exactly without explanation")],
22
- role="user"
23
- ),
24
- )
25
-
26
- audio_data = b""
27
- async with client.aio.live.connect(model="models/gemini-2.0-flash-exp", config=config) as session:
28
- await session.send(input=text, end_of_turn=True)
29
- async for response in session.receive():
30
- if data := response.data:
31
- audio_data += data
32
-
33
- # Save as WAV file (16-bit PCM, 24kHz)
34
- with open("output.wav", "wb") as f:
35
- f.write(b"RIFF\x00\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x7d\x00\x00\x02\x00\x10\x00data\x00\x00\x00\x00")
36
- f.write(audio_data)
 
 
 
 
 
 
 
37
 
38
- return "output.wav"
 
 
39
 
40
  def tts(text):
41
  if not text.strip():
42
  return None
43
  try:
44
- asyncio.run(generate_audio(text))
45
- return "output.wav"
46
  except Exception as e:
47
- return f"Error: {str(e)}"
48
 
49
- # Gradio Interface
50
  iface = gr.Interface(
51
  fn=tts,
52
  inputs=gr.Textbox(label="Enter Text", placeholder="Type here..."),
 
4
  from google.genai import types
5
  import gradio as gr
6
 
 
7
  API_KEY = os.getenv("GEMINI_API_KEY")
 
8
  client = genai.Client(api_key=API_KEY)
9
 
10
  async def generate_audio(text):
11
+ try:
12
+ config = types.LiveConnectConfig(
13
+ response_modalities=["audio"],
14
+ speech_config=types.SpeechConfig(
15
+ voice_config=types.VoiceConfig(
16
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
17
+ voice_name="Puck"
18
+ )
19
+ )
20
+ ),
21
+ # Fixed Part.from_text() usage (positional argument)
22
+ system_instruction=types.Content(
23
+ parts=[types.Part.from_text("Repeat user input exactly without explanation")],
24
+ role="user"
25
+ ),
26
+ )
27
+
28
+ audio_data = b""
29
+ async with client.aio.live.connect(model="models/gemini-2.0-flash-exp", config=config) as session:
30
+ await session.send(input=text, end_of_turn=True)
31
+ async for response in session.receive():
32
+ if data := response.data:
33
+ audio_data += data
34
+
35
+ # Save audio with proper WAV header
36
+ with open("output.wav", "wb") as f:
37
+ # WAV header for 24kHz 16-bit PCM
38
+ f.write(b'RIFF\x00\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x7d\x00\x00\x02\x00\x10\x00data\x00\x00\x00\x00')
39
+ f.write(audio_data)
40
+
41
+ return "output.wav"
42
 
43
+ except Exception as e:
44
+ print(f"Error: {str(e)}")
45
+ raise # Re-raise to trigger Gradio's error handling
46
 
47
  def tts(text):
48
  if not text.strip():
49
  return None
50
  try:
51
+ return asyncio.run(generate_audio(text))
 
52
  except Exception as e:
53
+ return f"Error: {str(e)}" # Now Gradio will show this message instead of trying to play it
54
 
 
55
  iface = gr.Interface(
56
  fn=tts,
57
  inputs=gr.Textbox(label="Enter Text", placeholder="Type here..."),