0xrushi
Initial commit
06bc80f
raw
history blame contribute delete
1.4 kB
from pydub import AudioSegment
import random
def create_music_speech_mix(speech_path, music_path="data/instrumental.wav", output_path="data/output.wav"):
"""
Combine speech audio with background music at random position.
Args:
speech_path (str): Path to speech WAV file
music_path (str): Path to music WAV file (default: data/instrumental.wav)
output_path (str): Path for output WAV file (default: output.wav)
Returns:
tuple: (start_time_seconds, end_time_seconds)
"""
speech = AudioSegment.from_wav(speech_path)
music = AudioSegment.from_wav(music_path)
# Durations (in milliseconds)
speech_len = len(speech)
music_len = len(music)
if speech_len > music_len:
raise ValueError("Speech audio is longer than background music!")
# Choose a random start point
max_start = music_len - speech_len
start_ms = random.randint(0, max_start)
# Extract the music segment
music_segment = music[start_ms : start_ms + speech_len]
# Lower volume by 10db
# music_segment = music_segment - 10
# Overlay speech on music
combined = music_segment.overlay(speech)
combined.export(output_path, format="wav")
return output_path
if __name__ == "__main__":
output_path = create_music_speech_mix("tests/infer_cli_basic.wav")
print(f"Created {output_path} using music")