Spaces:

deepakkumar07
/

whisper-small-demo

Sleeping

File size: 1,832 Bytes

23794e8
 
 
 
50fb5fd
 
23794e8
 
 
5f21810
50fb5fd
0ce1e34
 
5f21810
 
50fb5fd
 
 
0ce1e34
5f21810
 
23794e8
 
 
50fb5fd
23794e8
50fb5fd
 
23794e8
 
 
 
50fb5fd
 
 
 
 
 
 
 
 
 
 
 
 
0ce1e34
50fb5fd
23794e8

import torch
import gradio as gr
from transformers import pipeline

models = ["openai/whisper-small", "openai/whisper-base", "openai/whisper-medium", "openai/whisper-large"]
pipe = pipeline(task="automatic-speech-recognition",
                model="openai/whisper-small",
                device="cuda" if torch.cuda.is_available() else "cpu")


def initialize_pipeline(model_name):
    global pipe
    pipe = pipeline(task="automatic-speech-recognition",
                    model=model_name,
                    device="cuda" if torch.cuda.is_available() else "cpu")
    return model_name


def transcribe(audio):
    if audio is None:
        return "No audio input received. Please try again."
    text = pipe(audio)["text"]
    return text


interface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
    outputs="text",
    title="Whisper Small",
    description="Realtime demo for  Speech recognition using a Whisper small model.",
)

with gr.Blocks() as interface:
    # Dropdown to select the model
    model_dropdown = gr.Dropdown(choices=models, value=models[0], label="Select Model")
    # Audio input component
    audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Audio")
    # Text output component
    text_output = gr.Textbox(label="Transcribed Text")
    # Button to trigger transcription
    transcribe_button = gr.Button("Transcribe")
    # Event listener to initialize the pipeline when the model is selected
    model_dropdown.change(fn=initialize_pipeline, inputs=model_dropdown, outputs=None)
    # Event listener to transcribe the audio when the button is clicked
    transcribe_button.click(fn=transcribe, inputs=[audio_input], outputs=text_output)

if __name__ == "__main__":
    interface.launch()