Spaces:

deepakkumar07
/

whisper-small-demo

Sleeping

App Files Files Community

deepakkumar07 commited on Mar 12

Commit

50fb5fd

verified ·

1 Parent(s): 23794e8

added whisper models in dropdown to choose and transcribe.

Browse files

Files changed (1) hide show

app.py +29 -3

app.py CHANGED Viewed

@@ -2,20 +2,46 @@ import torch
 import gradio as gr
 from transformers import pipeline
-pipe = pipeline(task="automatic-speech-recognition",
                 model="openai/whisper-small",
                 device="cuda" if torch.cuda.is_available() else "cpu")
 def transcribe(audio):
     text = pipe(audio)["text"]
     return text
 interface = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
     outputs="text",
     title="Whisper Small",
     description="Realtime demo for  Speech recognition using a Whisper small model.",
 )
 if __name__ == "__main__":
     interface.launch()

 import gradio as gr
 from transformers import pipeline
+models = ["openai/whisper-small", "openai/whisper-base", "openai/whisper-medium", "openai/whisper-large"]
+pipe = pipeline(task="automatic-speech-recognition",
                 model="openai/whisper-small",
                 device="cuda" if torch.cuda.is_available() else "cpu")
+# Initialize the pipeline with the selected model
+def initialize_pipeline(model_name):
+    # Placeholder for the actual pipeline initialization
+    return model_name
 def transcribe(audio):
     text = pipe(audio)["text"]
     return text
 interface = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
     outputs="text",
     title="Whisper Small",
     description="Realtime demo for  Speech recognition using a Whisper small model.",
 )
+with gr.Blocks() as interface:
+    # Dropdown to select the model
+    model_dropdown = gr.Dropdown(choices=models, value=models[0], label="Select Model")
+    # Audio input component
+    audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Audio")
+    # Text output component
+    text_output = gr.Textbox(label="Transcribed Text")
+    # Button to trigger transcription
+    transcribe_button = gr.Button("Transcribe")
+    # Event listener to initialize the pipeline when the model is selected
+    model_dropdown.change(fn=initialize_pipeline, inputs=model_dropdown, outputs=None)
+    # Event listener to transcribe the audio when the button is clicked
+    transcribe_button.click(fn=transcribe, inputs=[audio_input, model_dropdown], outputs=text_output)
+    # Event listener to show the download button when audio is uploaded or recorded
 if __name__ == "__main__":
     interface.launch()