Spaces:

AiDi-UIR
/

TTS-Conv-Darija

Running

App Files Files Community

TTS-Conv-Darija / app.py

ayatnourhachmi

Update Google Gemini API configuration to use environment variable for API key

68e69bf about 1 month ago

raw

history blame contribute delete

4.51 kB

	import gradio as gr
	import google.generativeai as genai
	from gradio_client import Client, handle_file
	import tempfile
	import os

	genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
	model = genai.GenerativeModel('gemini-2.0-flash')

	# Load TTS client - using the correct Space URL
	tts_client = Client("https://medmac01-darija-arabic-tts.hf.space/")

	def generate_conversation(subject, speaker1_audio, speaker2_audio):
	try:
	prompt = f"""
	Generate a natural Moroccan Darija conversation in Arabic script only between two people about: "{subject}".
	Rules:
	- Use only Arabic script for Darija
	- Do not include any transliterations or translations
	- Do not include any Latin characters or parentheses
	- Use "Speaker 1" and "Speaker 2" as the speaker names

	Format:
	Speaker 1: [Arabic Darija text only]
	Speaker 2: [Arabic Darija text only]
	Speaker 1: [Arabic Darija text only]
	Speaker 2: [Arabic Darija text only]

	Keep it short and casual (4 lines).
	"""

	print("Sending prompt to Gemini API...")
	response = model.generate_content(prompt)
	print(f"Gemini API Response: {response}")

	if not response or not response.text:
	print("No response text received from Gemini API")
	return ["Error: No response from the model"] + [None] * 4

	result = response.text
	print(f"Generated text: {result}")

	# Split the text into lines and process each line
	lines = []
	for line in result.split('\n'):
	line = line.strip()
	if ':' in line:
	# Extract the text after the colon
	text = line.split(':', 1)[1].strip()
	# Add the appropriate speaker prefix
	if len(lines) % 2 == 0:
	lines.append(f"Speaker 1: {text}")
	else:
	lines.append(f"Speaker 2: {text}")

	print(f"Processed lines: {lines}")

	if not lines:
	print("No valid lines found in the response")
	return ["Error: No valid conversation generated"] + [None] * 4

	# Generate audio files using TTS
	audio_paths = []
	idx = 0
	for line in lines:
	speaker_audio = speaker1_audio if line.startswith("Speaker 1") else speaker2_audio
	text = line.split(":", 1)[1].strip()

	# Create TTS audio using the correct API call
	result = tts_client.predict(
	text=text,
	speaker_audio_path=handle_file(speaker_audio),
	temperature=0.75,
	api_name="/infer_EGTTS"
	)

	# Save the result to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	with open(result, "rb") as f:
	tmp.write(f.read())
	tmp.flush()
	audio_paths.append(tmp.name)
	idx += 1

	# Format the conversation text
	conversation_text = "\n".join(lines)

	# Ensure we have exactly 4 audio paths
	while len(audio_paths) < 4:
	audio_paths.append(None)

	# Return all outputs in the correct order
	return [conversation_text] + audio_paths[:4]

	except Exception as e:
	print(f"Error occurred: {str(e)}")
	return [f"Error: {str(e)}"] + [None] * 4

	with gr.Blocks() as demo:
	gr.Markdown("# 🗣️ Moroccan Darija Conversation Generator")
	gr.Markdown("Enter a discussion topic and upload 2 speaker voices. We'll generate a Darija conversation!")

	with gr.Row():
	subject = gr.Textbox(label="Subject of the discussion", placeholder="e.g. Going to the souk")
	with gr.Row():
	speaker1 = gr.Audio(label="Speaker 1 Reference (4-5 sec)", type="filepath")
	speaker2 = gr.Audio(label="Speaker 2 Reference (4-5 sec)", type="filepath")

	btn = gr.Button("🎤 Generate Conversation")

	# Add text output for the conversation
	conversation_output = gr.Textbox(label="Generated Conversation", lines=6)

	# Audio outputs
	audio_outputs = [gr.Audio(label=f"Line {i+1}") for i in range(4)]

	btn.click(
	generate_conversation,
	inputs=[subject, speaker1, speaker2],
	outputs=[conversation_output] + audio_outputs
	)

	demo.launch()