Spaces:

Athspi
/

Whshhs

Runtime error

App Files Files Community

Whshhs / app.py

Athspi

Update app.py

22938aa verified about 2 months ago

raw

history blame

2.02 kB

	import os
	import asyncio
	from google import genai
	from google.genai import types
	import gradio as gr

	API_KEY = os.getenv("GEMINI_API_KEY")
	client = genai.Client(api_key=API_KEY)

	async def generate_audio(text):
	try:
	config = types.LiveConnectConfig(
	response_modalities=["audio"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Puck")
	)
	),
	# Fixed Part.from_text() call with explicit text parameter
	system_instruction=types.Content(
	parts=[types.Part.from_text(text="Repeat user input exactly without explanation")],
	role="user"
	),
	)

	audio_data = b""
	async with client.aio.live.connect(model="models/gemini-2.0-flash-exp", config=config) as session:
	await session.send(input=text, end_of_turn=True)
	async for response in session.receive():
	if data := response.data:
	audio_data += data

	# Save audio with proper WAV header (24kHz 16-bit PCM)
	with open("output.wav", "wb") as f:
	f.write(b'RIFF\x00\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x7d\x00\x00\x02\x00\x10\x00data\x00\x00\x00\x00')
	f.write(audio_data)

	return "output.wav"

	except Exception as e:
	print(f"Error: {str(e)}")
	raise

	def tts(text):
	if not text.strip():
	return None
	return asyncio.run(generate_audio(text))

	iface = gr.Interface(
	fn=tts,
	inputs=gr.Textbox(label="Enter Text", placeholder="Type here..."),
	outputs=gr.Audio(label="TTS Output", type="filepath"),
	examples=["Hello, this is a test.", "How are you today?"],
	title="Gemini TTS Demo",
	description="Convert text to speech using Google's Gemini 2.0 Flash model"
	)

	if __name__ == "__main__":
	iface.launch()