Cheng Jed
initial commit
c005bf8
raw
history blame contribute delete
4.35 kB
import requests
import os
import io
from os import path
from typing import Dict, Literal, TypedDict, Optional
import argparse
import asyncio
import base64
# Environment variables
TTS_CLIENT_ID = os.environ.get('TTS_CLIENT_ID')
TTS_CLIENT_SECRET = os.environ.get('TTS_CLIENT_SECRET')
TTS_API_URL = os.environ.get('TTS_API_URL')
if not TTS_CLIENT_ID or not TTS_CLIENT_SECRET or not TTS_API_URL:
raise ValueError('Missing environment variables')
class TaskResult(TypedDict):
task_id: str
message: str
status: Literal['PENDING', 'SUCCESS', 'FAILED']
audio_url: str # base64 encoded wav audio
class Voice(TypedDict):
name: str
promptText: str
promptAudio: str
voices: Dict[str, Voice] = {
"mk_girl": {
"name": "👧 凱婷",
"promptText": "我決定咗啦,我要做一件到目前為止又或者永遠都唔會再見到我做嘅事。",
"promptAudio": path.join(path.dirname(__file__), "./voices/mk_girl.wav")
},
"doraemon": {
"name": "🥸 全叔",
"promptText": "各位觀眾大家好,我叮噹呢又同你哋見面啦。好多謝咁多年嚟各位嘅捧場同支持。",
"promptAudio": path.join(path.dirname(__file__), "./voices/doraemon3.wav")
},
"周星馳": {
"name": "😈 星爺",
"promptText": "大家好啊,想唔想同我做好朋友啊。",
"promptAudio": path.join(path.dirname(__file__), "./voices/sing.mp3")
}
}
async def tts(input_text: str, voice: Voice) -> str:
"""
Send TTS request with voice information
Args:
input_text: Text to be converted to speech
voice: Voice configuration
Returns:
task_id: ID of the TTS task
"""
files = {
'input_text': (None, input_text),
'prompt_text': (None, voice['promptText']),
'audio': ('prompt.wav', open(voice['promptAudio'], 'rb')),
'speed': (None, '1.0')
}
headers = {
'CF-Access-Client-Id': TTS_CLIENT_ID,
'CF-Access-Client-Secret': TTS_CLIENT_SECRET
}
response = requests.post(f"{TTS_API_URL}/api/tts",
files=files,
headers=headers)
response.raise_for_status()
return response.json()['task_id']
async def get_task_result(task_id: str) -> TaskResult:
"""
Get result of TTS task
Args:
task_id: ID of the TTS task
Returns:
Task result information
"""
headers = {
'Content-Type': 'application/json',
'CF-Access-Client-Id': TTS_CLIENT_ID,
'CF-Access-Client-Secret': TTS_CLIENT_SECRET
}
response = requests.get(f"{TTS_API_URL}/api/tts/{task_id}",
headers=headers)
response.raise_for_status()
return response.json()
async def main():
parser = argparse.ArgumentParser(description='Text-to-Speech with CosyVoice')
parser.add_argument('--text', help='Text to convert to speech')
parser.add_argument('--voice', '-v', choices=list(voices.keys()), default='mk_girl',
help='Voice to use for synthesis')
parser.add_argument('--output', '-o', default='output.wav',
help='Output audio file path')
args = parser.parse_args()
voice = voices[args.voice]
print(f"Converting text to speech using voice: {voice['name']}")
print(f"Text: {args.text}")
try:
task_id = await tts(args.text, voice)
print(f"TTS request submitted. Task ID: {task_id}")
while True:
result = await get_task_result(task_id)
if result['status'] != 'PENDING':
break
print("Waiting for TTS processing...")
await asyncio.sleep(1)
if result['status'] == 'SUCCESS':
audio_data = result['audio_url']
if ',' in audio_data:
audio_data = audio_data.split(',')[1]
with open(args.output, 'wb') as f:
f.write(base64.b64decode(audio_data))
print(f"Audio saved to {args.output}")
else:
print(f"TTS generation failed: {result['message']}")
except Exception as e:
print(f"Error: {str(e)}")
if __name__ == "__main__":
asyncio.run(main())