Spaces:

kaysrubio
/

speech_transcribe_phonemes_and_accent

Running

App Files Files Community

DontFreakOut commited on Mar 6

Commit

f847932

1 Parent(s): 4855128

Updated logic

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -28,23 +28,23 @@ esl_phoneme_pipe = pipeline("automatic-speech-recognition", model="mrrubino/wav2
 # Set up pipe for 2 accent classification models
 classifier = EncoderClassifier.from_hparams(source="Jzuluaga/accent-id-commonaccent_ecapa", savedir="pretrained_models/accent-id-commonaccent_ecapa")
-def native_accent_classifier(audio):
-  out_prob, score, index, text_lab = classifier.classify_file(audio)
-  return [{'accent': text_lab[0], 'score': round(score[0],2)}]
-def esl_accent_classifier(audio):
   esl_accent_pipe = pipeline(
     "audio-classification",
     model="kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2"
   )
   audio = audio.squeeze().numpy()
   result = esl_accent_pipe(audio, top_k=6)
   return [{'accent': result[0]['label'], 'score': round(result[0]['score'],2)}]
 def transcribe_and_classify_speech(file):
-  audio, sr = torchaudio.load(file)  # Load audio
-  audio = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(audio)
   try:
       asr_output = asr_pipe(
         file,
@@ -69,13 +69,13 @@ def transcribe_and_classify_speech(file):
     esl_phoneme_output = "Error"
   try:
-    native_accent_output = native_accent_classifier(audio)
   except Exception as e:
     print(f"An error occurred with Jzuluaga/accent-id-commonaccent_ecapa: {e}")
-    native_accent_output = [{'accent': e}, {'score': .0}]
   try:
-    esl_accent_output = esl_accent_classifier(audio)
   except Exception as e:
     print(f"An error occurred with kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2: {e}")
     esl_accent_output = [{'accent': 'Unknown-please upload single channel audio'}, {'score': .0}]
@@ -119,7 +119,8 @@ file_transcribe = create_transcription_interface("upload")
 demo = gr.TabbedInterface(
     [mic_transcribe, file_transcribe],
     ["Microphone Input", "Upload .wav file"],
-    title="Speech Transcription and Accent Classification",
 )
-demo.launch(debug=True)

 # Set up pipe for 2 accent classification models
 classifier = EncoderClassifier.from_hparams(source="Jzuluaga/accent-id-commonaccent_ecapa", savedir="pretrained_models/accent-id-commonaccent_ecapa")
+def native_accent_classifier(file):
+  out_prob, score, index, text_lab = classifier.classify_file(file)
+  rounded_score = round(score.item(), 2)
+  return [{'accent': text_lab[0], 'score': rounded_score}]
+def esl_accent_classifier(file):
   esl_accent_pipe = pipeline(
     "audio-classification",
     model="kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2"
   )
+  audio, sr = torchaudio.load(file)  # Load audio
+  audio = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(audio)
   audio = audio.squeeze().numpy()
   result = esl_accent_pipe(audio, top_k=6)
   return [{'accent': result[0]['label'], 'score': round(result[0]['score'],2)}]
 def transcribe_and_classify_speech(file):
   try:
       asr_output = asr_pipe(
         file,
     esl_phoneme_output = "Error"
   try:
+    native_accent_output = native_accent_classifier(file)
   except Exception as e:
     print(f"An error occurred with Jzuluaga/accent-id-commonaccent_ecapa: {e}")
+    native_accent_output = [{'accent': "Error"}, {'score': .0}]
   try:
+    esl_accent_output = esl_accent_classifier(file)
   except Exception as e:
     print(f"An error occurred with kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2: {e}")
     esl_accent_output = [{'accent': 'Unknown-please upload single channel audio'}, {'score': .0}]
 demo = gr.TabbedInterface(
     [mic_transcribe, file_transcribe],
     ["Microphone Input", "Upload .wav file"],
+    title="Speech Recognition and Accent Classification",
 )
+demo.launch()
+# demo.launch(debug=True)