Spaces:
Paused
Paused
feat: try to add language detector 3
Browse files- language_detector.py +7 -7
- requirements.txt +1 -1
language_detector.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import whisper
|
2 |
import numpy as np
|
3 |
import logging
|
4 |
import io
|
@@ -14,7 +14,7 @@ class LanguageDetector:
|
|
14 |
Args:
|
15 |
model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
|
16 |
"""
|
17 |
-
self.model =
|
18 |
logger.info(f"Loaded Whisper model {model_name} for language detection")
|
19 |
|
20 |
def detect_language_from_file(self, audio_file_path):
|
@@ -30,11 +30,11 @@ class LanguageDetector:
|
|
30 |
"""
|
31 |
try:
|
32 |
# Load and preprocess audio
|
33 |
-
audio =
|
34 |
-
audio =
|
35 |
|
36 |
# Make log-Mel spectrogram
|
37 |
-
mel =
|
38 |
|
39 |
# Detect language
|
40 |
_, probs = self.model.detect_language(mel)
|
@@ -67,10 +67,10 @@ class LanguageDetector:
|
|
67 |
audio = (audio * 32768).astype(np.int16)
|
68 |
|
69 |
# Load and preprocess audio
|
70 |
-
audio =
|
71 |
|
72 |
# Make log-Mel spectrogram
|
73 |
-
mel =
|
74 |
|
75 |
# Detect language
|
76 |
_, probs = self.model.detect_language(mel)
|
|
|
1 |
+
import whisper as whp
|
2 |
import numpy as np
|
3 |
import logging
|
4 |
import io
|
|
|
14 |
Args:
|
15 |
model_name (str): Name of the Whisper model to use. Default is "tiny" which is sufficient for language detection.
|
16 |
"""
|
17 |
+
self.model = whp.load_model(model_name)
|
18 |
logger.info(f"Loaded Whisper model {model_name} for language detection")
|
19 |
|
20 |
def detect_language_from_file(self, audio_file_path):
|
|
|
30 |
"""
|
31 |
try:
|
32 |
# Load and preprocess audio
|
33 |
+
audio = whp.load_audio(audio_file_path)
|
34 |
+
audio = whp.pad_or_trim(audio)
|
35 |
|
36 |
# Make log-Mel spectrogram
|
37 |
+
mel = whp.log_mel_spectrogram(audio).to(self.model.device)
|
38 |
|
39 |
# Detect language
|
40 |
_, probs = self.model.detect_language(mel)
|
|
|
67 |
audio = (audio * 32768).astype(np.int16)
|
68 |
|
69 |
# Load and preprocess audio
|
70 |
+
audio = whp.pad_or_trim(audio)
|
71 |
|
72 |
# Make log-Mel spectrogram
|
73 |
+
mel = whp.log_mel_spectrogram(audio).to(self.model.device)
|
74 |
|
75 |
# Detect language
|
76 |
_, probs = self.model.detect_language(mel)
|
requirements.txt
CHANGED
@@ -13,5 +13,5 @@ setuptools>=65.5.1
|
|
13 |
librosa>=0.10.0
|
14 |
mosestokenizer
|
15 |
hf_xet
|
16 |
-
whisper
|
17 |
librosa
|
|
|
13 |
librosa>=0.10.0
|
14 |
mosestokenizer
|
15 |
hf_xet
|
16 |
+
openai-whisper
|
17 |
librosa
|