lucas-ventura commited on
Commit
4771930
·
verified ·
1 Parent(s): 35adc06

Create asr_faster_whisper.py

Browse files
Files changed (1) hide show
  1. tools/extract/asr_faster_whisper.py +38 -0
tools/extract/asr_faster_whisper.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import torch
4
+ from faster_whisper import WhisperModel
5
+
6
+ from src.data.chapters import sec_to_hms
7
+
8
+ # Set device and disable TF32 for consistent results
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+
12
+ class ASRProcessor:
13
+ """
14
+ Automatic Speech Recognition processor using WhisperX.
15
+
16
+ Transcribes audio files and returns time-aligned transcription segments.
17
+ """
18
+
19
+ def __init__(self, model_name="large-v2", compute_type="float16"):
20
+ self.model_name = model_name
21
+ self.model = WhisperModel(model_name, device=device, compute_type=compute_type)
22
+
23
+ def get_asr(self, audio_file, return_duration=True):
24
+ assert Path(audio_file).exists(), f"File {audio_file} does not exist"
25
+ segments, info = self.model.transcribe(
26
+ audio_file, length_penalty=0.5, condition_on_previous_text=False
27
+ )
28
+
29
+ asr_clean = []
30
+ for segment in segments:
31
+ t = segment.text.strip()
32
+ s = sec_to_hms(segment.start)
33
+ asr_clean.append(f"{s}: {t}")
34
+
35
+ if return_duration:
36
+ return "\n".join(asr_clean) + "\n", info.duration
37
+ else:
38
+ return "\n".join(asr_clean) + "\n"