Spaces:
Running
Running
File size: 1,978 Bytes
383520d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
"""Audio utilities for CSM-1B API."""
import io
import tempfile
from typing import Optional
import os
import torch
import torchaudio
import ffmpeg
def convert_audio_format(
audio_tensor: torch.Tensor,
sample_rate: int,
format: str = "mp3",
bit_rate: Optional[str] = "128k",
) -> bytes:
"""Convert audio tensor to specified format.
Args:
audio_tensor: Audio tensor (channels, samples)
sample_rate: Sample rate
format: Output format (mp3, opus, aac, flac, wav)
bit_rate: Bit rate for lossy formats
Returns:
Audio bytes in specified format
"""
# Create temporary files
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
wav_path = temp_wav.name
temp_out = tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False)
out_path = temp_out.name
temp_out.close()
try:
# Save as WAV first (native format for torchaudio)
torchaudio.save(wav_path, audio_tensor.unsqueeze(0) if audio_tensor.dim() == 1 else audio_tensor,
sample_rate)
# Convert to desired format using ffmpeg
if format == "mp3":
ffmpeg.input(wav_path).output(out_path, format=format, audio_bitrate=bit_rate).run(quiet=True)
elif format in ["opus", "aac"]:
ffmpeg.input(wav_path).output(out_path, format=format).run(quiet=True)
elif format == "flac":
ffmpeg.input(wav_path).output(out_path, format=format).run(quiet=True)
elif format == "wav":
# Already saved as WAV
pass
# Read the output file
with open(out_path if format != "wav" else wav_path, "rb") as f:
audio_bytes = f.read()
return audio_bytes
finally:
# Clean up temporary files
for path in [wav_path, out_path]:
if os.path.exists(path):
os.unlink(path)
|