DontFreakOut
commited on
Commit
·
d40ccca
1
Parent(s):
7df1980
updating gradio interface
Browse files
app.py
CHANGED
@@ -71,7 +71,7 @@ def transcribe_and_classify_speech(audio):
|
|
71 |
native_accent_output = native_accent_classifier(audio)
|
72 |
except Exception as e:
|
73 |
print(f"An error occurred with Jzuluaga/accent-id-commonaccent_ecapa: {e}")
|
74 |
-
native_accent_output = [{'accent':
|
75 |
|
76 |
try:
|
77 |
esl_accent_output = esl_accent_classifier(audio)
|
@@ -88,36 +88,38 @@ def transcribe_and_classify_speech(audio):
|
|
88 |
]
|
89 |
return output
|
90 |
|
|
|
91 |
demo = gr.Blocks()
|
92 |
|
93 |
-
examples = [['chinese-american.wav'], ['mexican.wav'], ['vietnamese.wav'], ['indian.wav'], ['nigerian.wav']]
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
)
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
[mic_transcribe, file_transcribe],
|
113 |
-
["
|
114 |
-
|
115 |
-
|
116 |
-
demo.launch(debug=True)
|
117 |
-
|
118 |
-
|
119 |
-
#def greet(name):
|
120 |
-
# return "Hello " + name + "!!"
|
121 |
|
122 |
-
|
123 |
-
#demo.launch()
|
|
|
71 |
native_accent_output = native_accent_classifier(audio)
|
72 |
except Exception as e:
|
73 |
print(f"An error occurred with Jzuluaga/accent-id-commonaccent_ecapa: {e}")
|
74 |
+
native_accent_output = [{'accent': e}, {'score': .0}]
|
75 |
|
76 |
try:
|
77 |
esl_accent_output = esl_accent_classifier(audio)
|
|
|
88 |
]
|
89 |
return output
|
90 |
|
91 |
+
## Set up gradio app
|
92 |
demo = gr.Blocks()
|
93 |
|
94 |
+
examples = [['chinese-american.wav'], ['mexican.wav'], ['vietnamese.wav'], ['indian.wav'], ['nigerian.wav'], ['irish.wav']]
|
95 |
+
|
96 |
+
# Create a function to generate a vertically stacked interface
|
97 |
+
def create_transcription_interface(source):
|
98 |
+
with gr.Blocks() as interface:
|
99 |
+
gr.Markdown("""
|
100 |
+
Input: Use microphone, upload .wav file, or choose an example below
|
101 |
+
Output will include results from the following models:
|
102 |
+
- Transcription from OpenAI's Whisper [openai/whisper-base.en](https://huggingface.co/openai/whisper-base.en)
|
103 |
+
- Phonemic transcription trained on native English speakers [vitouphy/wav2vec2-xls-r-300m-timit-phoneme](https://huggingface.co/vitouphy/wav2vec2-xls-r-300m-timit-phoneme)
|
104 |
+
- Phonemic transcription trained on speakers of English as a second language [mrrubino/wav2vec2-large-xlsr-53-l2-arctic-phoneme](https://huggingface.co/mrrubino/wav2vec2-large-xlsr-53-l2-arctic-phoneme)
|
105 |
+
- Accent classification trained on native English speakers [Jzuluaga/accent-id-commonaccent_ecapa](https://huggingface.co/Jzuluaga/accent-id-commonaccent_ecapa)
|
106 |
+
- Accent classification trained on speakers of English as a second language [kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2](https://huggingface.co/kaysrubio/accent-id-distilhubert-finetuned-l2-arctic2)
|
107 |
+
""")
|
108 |
+
with gr.Column():
|
109 |
+
audio_input = gr.Audio(sources=source, type="filepath", label="Upload Audio")
|
110 |
+
output = gr.JSON(label="Results")
|
111 |
+
audio_input.change(fn=transcribe_and_classify_speech, inputs=audio_input, outputs=output)
|
112 |
+
gr.Examples(examples=examples, inputs=[audio_input])
|
113 |
+
return interface
|
114 |
+
|
115 |
+
# Create two interfaces (one for mic, one for file upload)
|
116 |
+
mic_transcribe = create_transcription_interface("microphone")
|
117 |
+
file_transcribe = create_transcription_interface("upload")
|
118 |
+
|
119 |
+
demo = gr.TabbedInterface(
|
120 |
[mic_transcribe, file_transcribe],
|
121 |
+
["Microphone Input", "Upload .wav file"],
|
122 |
+
title="Speech Transcription, Phonemic Transcription, and Accent Classification",
|
123 |
+
)
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
+
demo.launch(debug=True)
|
|