DontFreakOut
commited on
Commit
·
f847932
1
Parent(s):
4855128
Updated logic
Browse files
app.py
CHANGED
@@ -28,23 +28,23 @@ esl_phoneme_pipe = pipeline("automatic-speech-recognition", model="mrrubino/wav2
|
|
28 |
# Set up pipe for 2 accent classification models
|
29 |
classifier = EncoderClassifier.from_hparams(source="Jzuluaga/accent-id-commonaccent_ecapa", savedir="pretrained_models/accent-id-commonaccent_ecapa")
|
30 |
|
31 |
-
def native_accent_classifier(
|
32 |
-
out_prob, score, index, text_lab = classifier.classify_file(
|
33 |
-
|
|
|
34 |
|
35 |
-
def esl_accent_classifier(
|
36 |
esl_accent_pipe = pipeline(
|
37 |
"audio-classification",
|
38 |
model="kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2"
|
39 |
)
|
|
|
|
|
40 |
audio = audio.squeeze().numpy()
|
41 |
result = esl_accent_pipe(audio, top_k=6)
|
42 |
return [{'accent': result[0]['label'], 'score': round(result[0]['score'],2)}]
|
43 |
|
44 |
def transcribe_and_classify_speech(file):
|
45 |
-
audio, sr = torchaudio.load(file) # Load audio
|
46 |
-
audio = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(audio)
|
47 |
-
|
48 |
try:
|
49 |
asr_output = asr_pipe(
|
50 |
file,
|
@@ -69,13 +69,13 @@ def transcribe_and_classify_speech(file):
|
|
69 |
esl_phoneme_output = "Error"
|
70 |
|
71 |
try:
|
72 |
-
native_accent_output = native_accent_classifier(
|
73 |
except Exception as e:
|
74 |
print(f"An error occurred with Jzuluaga/accent-id-commonaccent_ecapa: {e}")
|
75 |
-
native_accent_output = [{'accent':
|
76 |
|
77 |
try:
|
78 |
-
esl_accent_output = esl_accent_classifier(
|
79 |
except Exception as e:
|
80 |
print(f"An error occurred with kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2: {e}")
|
81 |
esl_accent_output = [{'accent': 'Unknown-please upload single channel audio'}, {'score': .0}]
|
@@ -119,7 +119,8 @@ file_transcribe = create_transcription_interface("upload")
|
|
119 |
demo = gr.TabbedInterface(
|
120 |
[mic_transcribe, file_transcribe],
|
121 |
["Microphone Input", "Upload .wav file"],
|
122 |
-
title="Speech
|
123 |
)
|
124 |
|
125 |
-
demo.launch(
|
|
|
|
28 |
# Set up pipe for 2 accent classification models
|
29 |
classifier = EncoderClassifier.from_hparams(source="Jzuluaga/accent-id-commonaccent_ecapa", savedir="pretrained_models/accent-id-commonaccent_ecapa")
|
30 |
|
31 |
+
def native_accent_classifier(file):
|
32 |
+
out_prob, score, index, text_lab = classifier.classify_file(file)
|
33 |
+
rounded_score = round(score.item(), 2)
|
34 |
+
return [{'accent': text_lab[0], 'score': rounded_score}]
|
35 |
|
36 |
+
def esl_accent_classifier(file):
|
37 |
esl_accent_pipe = pipeline(
|
38 |
"audio-classification",
|
39 |
model="kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2"
|
40 |
)
|
41 |
+
audio, sr = torchaudio.load(file) # Load audio
|
42 |
+
audio = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(audio)
|
43 |
audio = audio.squeeze().numpy()
|
44 |
result = esl_accent_pipe(audio, top_k=6)
|
45 |
return [{'accent': result[0]['label'], 'score': round(result[0]['score'],2)}]
|
46 |
|
47 |
def transcribe_and_classify_speech(file):
|
|
|
|
|
|
|
48 |
try:
|
49 |
asr_output = asr_pipe(
|
50 |
file,
|
|
|
69 |
esl_phoneme_output = "Error"
|
70 |
|
71 |
try:
|
72 |
+
native_accent_output = native_accent_classifier(file)
|
73 |
except Exception as e:
|
74 |
print(f"An error occurred with Jzuluaga/accent-id-commonaccent_ecapa: {e}")
|
75 |
+
native_accent_output = [{'accent': "Error"}, {'score': .0}]
|
76 |
|
77 |
try:
|
78 |
+
esl_accent_output = esl_accent_classifier(file)
|
79 |
except Exception as e:
|
80 |
print(f"An error occurred with kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2: {e}")
|
81 |
esl_accent_output = [{'accent': 'Unknown-please upload single channel audio'}, {'score': .0}]
|
|
|
119 |
demo = gr.TabbedInterface(
|
120 |
[mic_transcribe, file_transcribe],
|
121 |
["Microphone Input", "Upload .wav file"],
|
122 |
+
title="Speech Recognition and Accent Classification",
|
123 |
)
|
124 |
|
125 |
+
demo.launch()
|
126 |
+
# demo.launch(debug=True)
|