Spaces:

Athspi
/

Whshhs

Runtime error

File size: 2,018 Bytes

bdfd7a5
b8a34b4
 
 
 
bdfd7a5
b8a34b4
 
bdfd7a5
b8a34b4
43ac355
 
 
 
 
8bdf1fa
43ac355
 
22938aa
43ac355
22938aa
43ac355
 
 
 
 
 
 
 
 
 
 
22938aa
43ac355
 
 
 
 
b8a34b4
43ac355
 
22938aa
b8a34b4
 
 
 
22938aa
b8a34b4
 
 
 
 
 
 
 
 
bdfd7a5
5f3d5cb
b8a34b4

import os
import asyncio
from google import genai
from google.genai import types
import gradio as gr

API_KEY = os.getenv("GEMINI_API_KEY")
client = genai.Client(api_key=API_KEY)

async def generate_audio(text):
    try:
        config = types.LiveConnectConfig(
            response_modalities=["audio"],
            speech_config=types.SpeechConfig(
                voice_config=types.VoiceConfig(
                    prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck")
                )
            ),
            # Fixed Part.from_text() call with explicit text parameter
            system_instruction=types.Content(
                parts=[types.Part.from_text(text="Repeat user input exactly without explanation")],
                role="user"
            ),
        )
        
        audio_data = b""
        async with client.aio.live.connect(model="models/gemini-2.0-flash-exp", config=config) as session:
            await session.send(input=text, end_of_turn=True)
            async for response in session.receive():
                if data := response.data:
                    audio_data += data
        
        # Save audio with proper WAV header (24kHz 16-bit PCM)
        with open("output.wav", "wb") as f:
            f.write(b'RIFF\x00\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x7d\x00\x00\x02\x00\x10\x00data\x00\x00\x00\x00')
            f.write(audio_data)
        
        return "output.wav"
    
    except Exception as e:
        print(f"Error: {str(e)}")
        raise

def tts(text):
    if not text.strip():
        return None
    return asyncio.run(generate_audio(text))

iface = gr.Interface(
    fn=tts,
    inputs=gr.Textbox(label="Enter Text", placeholder="Type here..."),
    outputs=gr.Audio(label="TTS Output", type="filepath"),
    examples=["Hello, this is a test.", "How are you today?"],
    title="Gemini TTS Demo",
    description="Convert text to speech using Google's Gemini 2.0 Flash model"
)

if __name__ == "__main__":
    iface.launch()