sesame_openai / app /api /schemas.py
karumati's picture
yo
01115c6
# app/api/schemas.py
from enum import Enum
from typing import Optional, List, Dict, Any, Union
from pydantic import BaseModel, Field
# Voice options as a non-restrictive string
class Voice(str):
"""Voice options for CSM model - allowing any string value"""
pass
class ResponseFormat(str, Enum):
mp3 = "mp3"
opus = "opus"
aac = "aac"
flac = "flac"
wav = "wav"
# Create SpeechRequest for compatibility with our new code
class SpeechRequest(BaseModel):
model: Optional[str] = Field("csm-1b", description="The TTS model to use")
input: str = Field(..., description="The text to generate audio for")
voice: Optional[str] = Field("alloy", description="The voice to use for generation")
response_format: Optional[ResponseFormat] = Field(ResponseFormat.mp3, description="The format of the audio response")
speed: Optional[float] = Field(1.0, description="The speed of the audio", ge=0.25, le=4.0)
# CSM-specific parameters
max_audio_length_ms: Optional[float] = Field(90000, description="Maximum audio length in milliseconds")
temperature: Optional[float] = Field(0.9, description="Sampling temperature", ge=0.0, le=2.0)
topk: Optional[int] = Field(50, description="Top-k for sampling", ge=1, le=100)
class Config:
populate_by_name = True
extra = "ignore" # Allow extra fields without error
# Maintain TTSRequest for backward compatibility
class TTSRequest(SpeechRequest):
"""Legacy alias for SpeechRequest for backward compatibility"""
pass
class TTSResponse(BaseModel):
"""Only used for API documentation"""
pass