|
from gtts import gTTS |
|
import io |
|
|
|
|
|
def text_to_speech(text, target_lang): |
|
try: |
|
if not text or not text.strip(): |
|
print("Error: Empty or invalid text for speech synthesis") |
|
return None |
|
cleaned_text = text.strip()[:200] |
|
print(f"Attempting to synthesize: '{cleaned_text}' for lang: {target_lang}") |
|
lang_map = {"en": "en", "fr": "fr", "es": "es", "de": "de", "hi": "hi", "zh": "zh-cn", "ar": "ar", "ru": "ru", "ja": "ja"} |
|
lang_code = lang_map.get(target_lang, "en") |
|
if lang_code not in lang_map.values(): |
|
print(f"Error: Unsupported language code {lang_code} for speech synthesis") |
|
return None |
|
tts = gTTS(text=cleaned_text, lang=lang_code, slow=False) |
|
audio_buffer = io.BytesIO() |
|
tts.write_to_fp(audio_buffer) |
|
audio_buffer.seek(0) |
|
if audio_buffer.getbuffer().nbytes > 0: |
|
print(f"Audio generated successfully, size: {audio_buffer.getbuffer().nbytes} bytes") |
|
return audio_buffer |
|
print("Error: Audio buffer is empty after synthesis") |
|
return None |
|
except Exception as e: |
|
print(f"Audio error: {e}") |
|
return None |