Spaces:
Sleeping
Sleeping
from pydub import AudioSegment | |
import random | |
def create_music_speech_mix(speech_path, music_path="data/instrumental.wav", output_path="data/output.wav"): | |
""" | |
Combine speech audio with background music at random position. | |
Args: | |
speech_path (str): Path to speech WAV file | |
music_path (str): Path to music WAV file (default: data/instrumental.wav) | |
output_path (str): Path for output WAV file (default: output.wav) | |
Returns: | |
tuple: (start_time_seconds, end_time_seconds) | |
""" | |
speech = AudioSegment.from_wav(speech_path) | |
music = AudioSegment.from_wav(music_path) | |
# Durations (in milliseconds) | |
speech_len = len(speech) | |
music_len = len(music) | |
if speech_len > music_len: | |
raise ValueError("Speech audio is longer than background music!") | |
# Choose a random start point | |
max_start = music_len - speech_len | |
start_ms = random.randint(0, max_start) | |
# Extract the music segment | |
music_segment = music[start_ms : start_ms + speech_len] | |
# Lower volume by 10db | |
# music_segment = music_segment - 10 | |
# Overlay speech on music | |
combined = music_segment.overlay(speech) | |
combined.export(output_path, format="wav") | |
return output_path | |
if __name__ == "__main__": | |
output_path = create_music_speech_mix("tests/infer_cli_basic.wav") | |
print(f"Created {output_path} using music") | |