Spaces:
unijoh
/
Runtime error

unijoh commited on
Commit
ce8e849
·
verified ·
1 Parent(s): 237c3b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -97
app.py CHANGED
@@ -1,87 +1,18 @@
1
  import gradio as gr
2
- import librosa
3
  from asr import transcribe, ASR_EXAMPLES, ASR_NOTE
4
  from tts import synthesize, TTS_EXAMPLES
5
  from lid import identify, LID_EXAMPLES
6
 
7
- demo = gr.Blocks()
8
-
9
- mms_select_source_trans = gr.Radio(
10
- ["Record from Mic", "Upload audio"],
11
- label="Audio input",
12
- value="Record from Mic",
13
- )
14
- mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
15
- mms_upload_source_trans = gr.Audio(
16
- source="upload", type="filepath", label="Upload file", visible=False
17
- )
18
- mms_transcribe = gr.Interface(
19
- fn=lambda audio_input, mic_input, upload_input: transcribe(audio_input, mic_input, upload_input, "fao (Faroese)"),
20
- inputs=[
21
- mms_select_source_trans,
22
- mms_mic_source_trans,
23
- mms_upload_source_trans,
24
- # Hidden language input
25
- gr.Textbox(value="fao (Faroese)", visible=False),
26
- # gr.Checkbox(label="Use Language Model (if available)", default=True),
27
- ],
28
- outputs="text",
29
- examples=ASR_EXAMPLES,
30
- title="Speech-to-text",
31
- description=(
32
- "Transcribe audio from a microphone or input file in Faroese."
33
- ),
34
- article=ASR_NOTE,
35
- allow_flagging="never",
36
- )
37
-
38
- mms_synthesize = gr.Interface(
39
- fn=lambda text, speed: synthesize(text, "fao (Faroese)", speed),
40
- inputs=[
41
- gr.Text(label="Input text"),
42
- # Hidden language input
43
- gr.Textbox(value="fao (Faroese)", visible=False),
44
- gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
45
- ],
46
- outputs=[
47
- gr.Audio(label="Generated Audio", type="numpy"),
48
- gr.Text(label="Filtered text after removing OOVs"),
49
- ],
50
- examples=TTS_EXAMPLES,
51
- title="Text-to-speech",
52
- description=("Generate audio in Faroese from input text."),
53
- allow_flagging="never",
54
- )
55
 
56
- mms_select_source_iden = gr.Radio(
57
- ["Record from Mic", "Upload audio"],
58
- label="Audio input",
59
- value="Record from Mic",
60
- )
61
- mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
62
- mms_upload_source_iden = gr.Audio(
63
- source="upload", type="filepath", label="Upload file", visible=False
64
- )
65
- mms_identify = gr.Interface(
66
- fn=identify,
67
- inputs=[
68
- mms_select_source_iden,
69
- mms_mic_source_iden,
70
- mms_upload_source_iden,
71
- ],
72
- outputs=gr.Label(num_top_classes=10),
73
- examples=LID_EXAMPLES,
74
- title="Language Identification",
75
- description=("Identity the language of input audio."),
76
- allow_flagging="never",
77
- )
78
 
79
- tabbed_interface = gr.TabbedInterface(
80
- [mms_transcribe, mms_synthesize, mms_identify],
81
- ["Speech-to-text", "Text-to-speech", "Language Identification"],
82
- )
83
 
84
- with gr.Blocks() as demo:
85
  gr.Markdown(
86
  "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
87
  )
@@ -89,31 +20,102 @@ with gr.Blocks() as demo:
89
  """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos. </center>"""
90
  )
91
  gr.HTML(
92
- """<center>You can also finetune MMS models on your data using the recipes provides here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a> </center>"""
93
  )
94
  gr.HTML(
95
  """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
96
  )
97
 
98
- tabbed_interface.render()
99
- mms_select_source_trans.change(
100
- lambda x: [
101
- gr.update(visible=True if x == "Record from Mic" else False),
102
- gr.update(visible=True if x == "Upload audio" else False),
103
- ],
104
- inputs=[mms_select_source_trans],
105
- outputs=[mms_mic_source_trans, mms_upload_source_trans],
106
- queue=False,
107
- )
108
- mms_select_source_iden.change(
109
- lambda x: [
110
- gr.update(visible=True if x == "Record from Mic" else False),
111
- gr.update(visible=True if x == "Upload audio" else False),
112
- ],
113
- inputs=[mms_select_source_iden],
114
- outputs=[mms_mic_source_iden, mms_upload_source_iden],
115
- queue=False,
116
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  gr.HTML(
118
  """
119
  <div class="footer" style="text-align:center">
@@ -122,7 +124,7 @@ with gr.Blocks() as demo:
122
  </p>
123
  </div>
124
  """
125
- )
126
 
127
  demo.queue(concurrency_count=3)
128
  demo.launch()
 
1
  import gradio as gr
 
2
  from asr import transcribe, ASR_EXAMPLES, ASR_NOTE
3
  from tts import synthesize, TTS_EXAMPLES
4
  from lid import identify, LID_EXAMPLES
5
 
6
+ def wrapped_transcribe(select_source, mic_audio, upload_audio):
7
+ audio_input = mic_audio if select_source == "Record from Mic" else upload_audio
8
+ return transcribe(audio_input, "fao (Faroese)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ def wrapped_synthesize(text, speed):
11
+ return synthesize(text, "fao (Faroese)", speed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ demo = gr.Blocks()
 
 
 
14
 
15
+ with demo:
16
  gr.Markdown(
17
  "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
18
  )
 
20
  """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos. </center>"""
21
  )
22
  gr.HTML(
23
+ """<center>You can also finetune MMS models on your data using the recipes provided here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a> </center>"""
24
  )
25
  gr.HTML(
26
  """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
27
  )
28
 
29
+ with gr.TabbedInterface(["Speech-to-text", "Text-to-speech", "Language Identification"]) as tabs:
30
+
31
+ with tabs[0]:
32
+ mms_select_source_trans = gr.Radio(
33
+ ["Record from Mic", "Upload audio"],
34
+ label="Audio input",
35
+ value="Record from Mic",
36
+ )
37
+ mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
38
+ mms_upload_source_trans = gr.Audio(
39
+ source="upload", type="filepath", label="Upload file", visible=False
40
+ )
41
+ gr.Interface(
42
+ fn=wrapped_transcribe,
43
+ inputs=[
44
+ mms_select_source_trans,
45
+ mms_mic_source_trans,
46
+ mms_upload_source_trans,
47
+ ],
48
+ outputs="text",
49
+ examples=ASR_EXAMPLES,
50
+ title="Speech-to-text",
51
+ description=(
52
+ "Transcribe audio from a microphone or input file in Faroese."
53
+ ),
54
+ article=ASR_NOTE,
55
+ allow_flagging="never",
56
+ ).render()
57
+
58
+ mms_select_source_trans.change(
59
+ lambda x: [
60
+ gr.update(visible=True if x == "Record from Mic" else False),
61
+ gr.update(visible=True if x == "Upload audio" else False),
62
+ ],
63
+ inputs=[mms_select_source_trans],
64
+ outputs=[mms_mic_source_trans, mms_upload_source_trans],
65
+ queue=False,
66
+ )
67
+
68
+ with tabs[1]:
69
+ gr.Interface(
70
+ fn=wrapped_synthesize,
71
+ inputs=[
72
+ gr.Text(label="Input text"),
73
+ gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
74
+ ],
75
+ outputs=[
76
+ gr.Audio(label="Generated Audio", type="numpy"),
77
+ gr.Text(label="Filtered text after removing OOVs"),
78
+ ],
79
+ examples=TTS_EXAMPLES,
80
+ title="Text-to-speech",
81
+ description=("Generate audio in Faroese from input text."),
82
+ allow_flagging="never",
83
+ ).render()
84
+
85
+ with tabs[2]:
86
+ mms_select_source_iden = gr.Radio(
87
+ ["Record from Mic", "Upload audio"],
88
+ label="Audio input",
89
+ value="Record from Mic",
90
+ )
91
+ mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
92
+ mms_upload_source_iden = gr.Audio(
93
+ source="upload", type="filepath", label="Upload file", visible=False
94
+ )
95
+ gr.Interface(
96
+ fn=identify,
97
+ inputs=[
98
+ mms_select_source_iden,
99
+ mms_mic_source_iden,
100
+ mms_upload_source_iden,
101
+ ],
102
+ outputs=gr.Label(num_top_classes=10),
103
+ examples=LID_EXAMPLES,
104
+ title="Language Identification",
105
+ description=("Identify the language of input audio."),
106
+ allow_flagging="never",
107
+ ).render()
108
+
109
+ mms_select_source_iden.change(
110
+ lambda x: [
111
+ gr.update(visible=True if x == "Record from Mic" else False),
112
+ gr.update(visible=True if x == "Upload audio" else False),
113
+ ],
114
+ inputs=[mms_select_source_iden],
115
+ outputs=[mms_mic_source_iden, mms_upload_source_iden],
116
+ queue=False,
117
+ )
118
+
119
  gr.HTML(
120
  """
121
  <div class="footer" style="text-align:center">
 
124
  </p>
125
  </div>
126
  """
127
+ )
128
 
129
  demo.queue(concurrency_count=3)
130
  demo.launch()