jamiya / app /utils /audio_utils.py
jameszokah's picture
init commit
383520d
"""Audio utilities for CSM-1B API."""
import io
import tempfile
from typing import Optional
import os
import torch
import torchaudio
import ffmpeg
def convert_audio_format(
audio_tensor: torch.Tensor,
sample_rate: int,
format: str = "mp3",
bit_rate: Optional[str] = "128k",
) -> bytes:
"""Convert audio tensor to specified format.
Args:
audio_tensor: Audio tensor (channels, samples)
sample_rate: Sample rate
format: Output format (mp3, opus, aac, flac, wav)
bit_rate: Bit rate for lossy formats
Returns:
Audio bytes in specified format
"""
# Create temporary files
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
wav_path = temp_wav.name
temp_out = tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False)
out_path = temp_out.name
temp_out.close()
try:
# Save as WAV first (native format for torchaudio)
torchaudio.save(wav_path, audio_tensor.unsqueeze(0) if audio_tensor.dim() == 1 else audio_tensor,
sample_rate)
# Convert to desired format using ffmpeg
if format == "mp3":
ffmpeg.input(wav_path).output(out_path, format=format, audio_bitrate=bit_rate).run(quiet=True)
elif format in ["opus", "aac"]:
ffmpeg.input(wav_path).output(out_path, format=format).run(quiet=True)
elif format == "flac":
ffmpeg.input(wav_path).output(out_path, format=format).run(quiet=True)
elif format == "wav":
# Already saved as WAV
pass
# Read the output file
with open(out_path if format != "wav" else wav_path, "rb") as f:
audio_bytes = f.read()
return audio_bytes
finally:
# Clean up temporary files
for path in [wav_path, out_path]:
if os.path.exists(path):
os.unlink(path)