File size: 1,978 Bytes
383520d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""Audio utilities for CSM-1B API."""

import io
import tempfile
from typing import Optional
import os

import torch
import torchaudio
import ffmpeg


def convert_audio_format(
    audio_tensor: torch.Tensor,
    sample_rate: int,
    format: str = "mp3",
    bit_rate: Optional[str] = "128k",
) -> bytes:
    """Convert audio tensor to specified format.
    
    Args:
        audio_tensor: Audio tensor (channels, samples)
        sample_rate: Sample rate
        format: Output format (mp3, opus, aac, flac, wav)
        bit_rate: Bit rate for lossy formats
    
    Returns:
        Audio bytes in specified format
    """
    # Create temporary files
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
        wav_path = temp_wav.name
    
    temp_out = tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False)
    out_path = temp_out.name
    temp_out.close()
    
    try:
        # Save as WAV first (native format for torchaudio)
        torchaudio.save(wav_path, audio_tensor.unsqueeze(0) if audio_tensor.dim() == 1 else audio_tensor, 
                        sample_rate)
        
        # Convert to desired format using ffmpeg
        if format == "mp3":
            ffmpeg.input(wav_path).output(out_path, format=format, audio_bitrate=bit_rate).run(quiet=True)
        elif format in ["opus", "aac"]:
            ffmpeg.input(wav_path).output(out_path, format=format).run(quiet=True)
        elif format == "flac":
            ffmpeg.input(wav_path).output(out_path, format=format).run(quiet=True)
        elif format == "wav":
            # Already saved as WAV
            pass
        
        # Read the output file
        with open(out_path if format != "wav" else wav_path, "rb") as f:
            audio_bytes = f.read()
        
        return audio_bytes
    
    finally:
        # Clean up temporary files
        for path in [wav_path, out_path]:
            if os.path.exists(path):
                os.unlink(path)