jiaofengxu commited on
Commit
f946d47
Β·
1 Parent(s): 9bbe4f5
Files changed (3) hide show
  1. app.py +55 -3
  2. packages.txt +2 -0
  3. requirements.txt +9 -2
app.py CHANGED
@@ -1,5 +1,5 @@
1
  #from transformers import pipeline
2
- import gradio as gr
3
  #import nemo.collections.asr as nemo_asr
4
  #import gradio
5
 
@@ -33,5 +33,57 @@ gr.Interface(
33
  ).launch(share=True)
34
  '''
35
  #gr.load("models/nvidia/parakeet-ctc-1.1b").launch()
36
- gr.load("models/openai/whisper-medium.en").launch()
37
- #gr.load("models/nvidia/stt_en_conformer_ctc_small").launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #from transformers import pipeline
2
+ #import gradio as gr
3
  #import nemo.collections.asr as nemo_asr
4
  #import gradio
5
 
 
33
  ).launch(share=True)
34
  '''
35
  #gr.load("models/nvidia/parakeet-ctc-1.1b").launch()
36
+ #gr.load("models/openai/whisper-medium.en").launch()
37
+ #gr.load("models/nvidia/stt_en_conformer_ctc_small").launch(share=True)
38
+
39
+
40
+ import os
41
+ os.system("""pip install nemo_toolkit['all']""")
42
+
43
+ import nemo.collections.asr as nemo_asr
44
+ from transformers import pipeline
45
+ import numpy as np
46
+ import gradio as gr
47
+ import librosa
48
+ from scipy.io.wavfile import write
49
+
50
+ def respond(message, chat_history):
51
+ bot_message = message
52
+ chat_history.append((message, bot_message))
53
+ return "", chat_history
54
+
55
+ def transcribe(audio):
56
+ sr, y = audio
57
+ audio_name = "resampled_audio.wav"
58
+ resampled_audio = librosa.resample(y=y.astype("float"), orig_sr=sr, target_sr=16000)
59
+ write(audio_name, 16000, resampled_audio)
60
+ result = asr_model.transcribe([f"./{audio_name}"])
61
+ return result[0]
62
+
63
+ asr_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
64
+
65
+ with gr.Blocks() as demo:
66
+ with gr.Column():
67
+ gr.Markdown(
68
+ """
69
+ # HKU Canteen VA
70
+ """)
71
+ va = gr.Chatbot(container=False)
72
+
73
+ with gr.Row(): # text input
74
+ text_input = gr.Textbox(placeholder="Ask me anything...", container=False, scale=1)
75
+ submit_btn = gr.Button("Submit", scale=0)
76
+
77
+ with gr.Row(): # audio input
78
+ recording = gr.Microphone(show_download_button=False, container=False)
79
+
80
+ with gr.Row(): # button toolbar
81
+ clear = gr.ClearButton([text_input, va])
82
+
83
+ text_input.submit(respond, [text_input, va], [text_input, va], queue=False)
84
+ submit_btn.click(respond, [text_input, va], [text_input, va], queue=False)
85
+ # recording.stop_recording(transcribe, [recording], [text_input]).then(respond,s [text_input, va], [text_input, va], queue=False)
86
+ recording.stop_recording(transcribe, [recording], [text_input])
87
+
88
+ if __name__ == "__main__":
89
+ demo.launch(share=True)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ ffmpeg
requirements.txt CHANGED
@@ -1,10 +1,17 @@
1
  #transformers
2
  #torch
3
- gradio
4
  #nemo_toolkit
5
  #hydra-core
6
  #pytorch_lightning
7
  #Cython
8
  #librosa
9
  #sentencepiece
10
- #youtokentome
 
 
 
 
 
 
 
 
1
  #transformers
2
  #torch
3
+ #gradio
4
  #nemo_toolkit
5
  #hydra-core
6
  #pytorch_lightning
7
  #Cython
8
  #librosa
9
  #sentencepiece
10
+ #youtokentome
11
+
12
+ torch
13
+ transformers
14
+ numpy
15
+ librosa
16
+ scipy
17
+ Cython