Spaces:
Running
Running
import functools | |
import numpy as np | |
from faster_whisper.vad import VadOptions | |
from fastapi import ( | |
File, | |
UploadFile, | |
) | |
from fastapi import APIRouter, BackgroundTasks, Depends, Response, status | |
from typing import List, Dict | |
from datetime import datetime | |
from modules.vad.silero_vad import SileroVAD | |
from modules.whisper.data_classes import VadParams | |
from backend.common.audio import read_audio | |
from backend.common.models import QueueResponse | |
from backend.db.task.dao import add_task_to_db, update_task_status_in_db | |
from backend.db.task.models import TaskStatus, TaskType | |
vad_router = APIRouter(prefix="/vad", tags=["Voice Activity Detection"]) | |
def get_vad_model() -> SileroVAD: | |
inferencer = SileroVAD() | |
inferencer.update_model() | |
return inferencer | |
def run_vad( | |
audio: np.ndarray, | |
params: VadOptions, | |
identifier: str, | |
) -> List[Dict]: | |
update_task_status_in_db( | |
identifier=identifier, | |
update_data={ | |
"uuid": identifier, | |
"status": TaskStatus.IN_PROGRESS, | |
"updated_at": datetime.utcnow() | |
} | |
) | |
start_time = datetime.utcnow() | |
audio, speech_chunks = get_vad_model().run( | |
audio=audio, | |
vad_parameters=params | |
) | |
elapsed_time = (datetime.utcnow() - start_time).total_seconds() | |
update_task_status_in_db( | |
identifier=identifier, | |
update_data={ | |
"uuid": identifier, | |
"status": TaskStatus.COMPLETED, | |
"updated_at": datetime.utcnow(), | |
"result": speech_chunks, | |
"duration": elapsed_time | |
} | |
) | |
return speech_chunks | |
async def vad( | |
background_tasks: BackgroundTasks, | |
file: UploadFile = File(..., description="Audio or video file to detect voices."), | |
params: VadParams = Depends() | |
) -> QueueResponse: | |
if not isinstance(file, np.ndarray): | |
audio, info = await read_audio(file=file) | |
else: | |
audio, info = file, None | |
vad_options = VadOptions( | |
threshold=params.threshold, | |
min_speech_duration_ms=params.min_speech_duration_ms, | |
max_speech_duration_s=params.max_speech_duration_s, | |
min_silence_duration_ms=params.min_silence_duration_ms, | |
speech_pad_ms=params.speech_pad_ms | |
) | |
identifier = add_task_to_db( | |
status=TaskStatus.QUEUED, | |
file_name=file.filename, | |
audio_duration=info.duration if info else None, | |
task_type=TaskType.VAD, | |
task_params=params.model_dump(), | |
) | |
background_tasks.add_task(run_vad, audio=audio, params=vad_options, identifier=identifier) | |
return QueueResponse(identifier=identifier, status=TaskStatus.QUEUED, message="VAD task has queued") | |