Spaces:

Athspi
/

Whshhs

Runtime error

App Files Files Community

Whshhs / app.py

Athspi

Update app.py

e8ee7fe verified about 1 month ago

raw

history blame

3.63 kB

	import gradio as gr
	import asyncio
	import numpy as np
	from google import genai
	from google.genai import types
	import soundfile as sf
	import io

	# Configuration
	SAMPLE_RATE = 24000
	MODEL = "models/gemini-2.0-flash-exp" # Updated to a stable model version

	class GeminiTTS:
	def __init__(self, api_key):
	if not api_key:
	raise ValueError("API key cannot be empty")
	self.client = genai.Client(api_key=api_key) # Removed experimental http_options
	self.config = types.GenerationConfig(
	candidate_count=1,
	max_output_tokens=2048,
	temperature=0.9,
	)

	async def text_to_speech(self, text):
	try:
	# Using standard generate_content instead of experimental live API
	response = await self.client.generate_content_async(
	contents=[types.Content(parts=[types.Part(text=text)])],
	generation_config=self.config
	)

	# For actual TTS, you would use the text response with a TTS service
	# This is a placeholder for the actual audio generation
	text_response = response.text

	# Generate synthetic audio (replace with actual TTS API call)
	duration = min(max(len(text_response) * 0.1, 10) # Max 10 seconds
	t = np.linspace(0, duration, int(SAMPLE_RATE * duration), False)
	audio_data = np.sin(2 * np.pi * 220 * t) * 0.5 # Simple sine wave

	# Convert to WAV bytes for Gradio
	with io.BytesIO() as wav_buffer:
	sf.write(wav_buffer, audio_data, SAMPLE_RATE, format='WAV')
	return wav_buffer.getvalue(), text_response

	except Exception as e:
	return None, f"Error: {str(e)}"

	def create_interface():
	tts_engine = None

	def init_engine(api_key):
	nonlocal tts_engine
	try:
	tts_engine = GeminiTTS(api_key)
	return "✅ TTS Initialized Successfully"
	except Exception as e:
	return f"❌ Initialization Failed: {str(e)}"

	async def generate_speech(text):
	if not tts_engine:
	raise gr.Error("Please initialize the TTS first")

	audio_data, message = await tts_engine.text_to_speech(text)

	if audio_data:
	return (SAMPLE_RATE, audio_data), message
	return None, message

	with gr.Blocks(title="Gemini TTS") as app:
	gr.Markdown("# 🎤 Gemini Text-to-Speech")

	with gr.Row():
	api_key = gr.Textbox(
	label="API Key",
	type="password",
	placeholder="Enter your Gemini API key"
	)
	init_btn = gr.Button("Initialize")

	init_status = gr.Textbox(label="Status", interactive=False)
	init_btn.click(init_engine, inputs=api_key, outputs=init_status)

	with gr.Group():
	text_input = gr.Textbox(
	label="Input Text",
	lines=3,
	placeholder="Type something to speak..."
	)
	generate_btn = gr.Button("Generate Speech")

	audio_output = gr.Audio(label="Output Audio")
	text_output = gr.Textbox(label="Response Message", interactive=False)

	generate_btn.click(
	generate_speech,
	inputs=text_input,
	outputs=[audio_output, text_output]
	)

	return app

	if __name__ == "__main__":
	app = create_interface()
	app.launch(server_name="0.0.0.0", server_port=7860)