Spaces:
Running
Running
import gradio as gr | |
import google.generativeai as genai | |
from gradio_client import Client, handle_file | |
import tempfile | |
import os | |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
model = genai.GenerativeModel('gemini-2.0-flash') | |
# Load TTS client - using the correct Space URL | |
tts_client = Client("https://medmac01-darija-arabic-tts.hf.space/") | |
def generate_conversation(subject, speaker1_audio, speaker2_audio): | |
try: | |
prompt = f""" | |
Generate a natural Moroccan Darija conversation in Arabic script only between two people about: "{subject}". | |
Rules: | |
- Use only Arabic script for Darija | |
- Do not include any transliterations or translations | |
- Do not include any Latin characters or parentheses | |
- Use "Speaker 1" and "Speaker 2" as the speaker names | |
Format: | |
Speaker 1: [Arabic Darija text only] | |
Speaker 2: [Arabic Darija text only] | |
Speaker 1: [Arabic Darija text only] | |
Speaker 2: [Arabic Darija text only] | |
Keep it short and casual (4 lines). | |
""" | |
print("Sending prompt to Gemini API...") | |
response = model.generate_content(prompt) | |
print(f"Gemini API Response: {response}") | |
if not response or not response.text: | |
print("No response text received from Gemini API") | |
return ["Error: No response from the model"] + [None] * 4 | |
result = response.text | |
print(f"Generated text: {result}") | |
# Split the text into lines and process each line | |
lines = [] | |
for line in result.split('\n'): | |
line = line.strip() | |
if ':' in line: | |
# Extract the text after the colon | |
text = line.split(':', 1)[1].strip() | |
# Add the appropriate speaker prefix | |
if len(lines) % 2 == 0: | |
lines.append(f"Speaker 1: {text}") | |
else: | |
lines.append(f"Speaker 2: {text}") | |
print(f"Processed lines: {lines}") | |
if not lines: | |
print("No valid lines found in the response") | |
return ["Error: No valid conversation generated"] + [None] * 4 | |
# Generate audio files using TTS | |
audio_paths = [] | |
idx = 0 | |
for line in lines: | |
speaker_audio = speaker1_audio if line.startswith("Speaker 1") else speaker2_audio | |
text = line.split(":", 1)[1].strip() | |
# Create TTS audio using the correct API call | |
result = tts_client.predict( | |
text=text, | |
speaker_audio_path=handle_file(speaker_audio), | |
temperature=0.75, | |
api_name="/infer_EGTTS" | |
) | |
# Save the result to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
with open(result, "rb") as f: | |
tmp.write(f.read()) | |
tmp.flush() | |
audio_paths.append(tmp.name) | |
idx += 1 | |
# Format the conversation text | |
conversation_text = "\n".join(lines) | |
# Ensure we have exactly 4 audio paths | |
while len(audio_paths) < 4: | |
audio_paths.append(None) | |
# Return all outputs in the correct order | |
return [conversation_text] + audio_paths[:4] | |
except Exception as e: | |
print(f"Error occurred: {str(e)}") | |
return [f"Error: {str(e)}"] + [None] * 4 | |
with gr.Blocks() as demo: | |
gr.Markdown("# π£οΈ Moroccan Darija Conversation Generator") | |
gr.Markdown("Enter a discussion topic and upload 2 speaker voices. We'll generate a Darija conversation!") | |
with gr.Row(): | |
subject = gr.Textbox(label="Subject of the discussion", placeholder="e.g. Going to the souk") | |
with gr.Row(): | |
speaker1 = gr.Audio(label="Speaker 1 Reference (4-5 sec)", type="filepath") | |
speaker2 = gr.Audio(label="Speaker 2 Reference (4-5 sec)", type="filepath") | |
btn = gr.Button("π€ Generate Conversation") | |
# Add text output for the conversation | |
conversation_output = gr.Textbox(label="Generated Conversation", lines=6) | |
# Audio outputs | |
audio_outputs = [gr.Audio(label=f"Line {i+1}") for i in range(4)] | |
btn.click( | |
generate_conversation, | |
inputs=[subject, speaker1, speaker2], | |
outputs=[conversation_output] + audio_outputs | |
) | |
demo.launch() |