Spaces:
Sleeping
Sleeping
import torch | |
import gradio as gr | |
from transformers import pipeline | |
models = ["openai/whisper-small", "openai/whisper-base", "openai/whisper-medium", "openai/whisper-large"] | |
pipe = pipeline(task="automatic-speech-recognition", | |
model="openai/whisper-small", | |
device="cuda" if torch.cuda.is_available() else "cpu") | |
def initialize_pipeline(model_name): | |
global pipe | |
pipe = pipeline(task="automatic-speech-recognition", | |
model=model_name, | |
device="cuda" if torch.cuda.is_available() else "cpu") | |
return model_name | |
def transcribe(audio): | |
if audio is None: | |
return "No audio input received. Please try again." | |
text = pipe(audio)["text"] | |
return text | |
interface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), | |
outputs="text", | |
title="Whisper Small", | |
description="Realtime demo for Speech recognition using a Whisper small model.", | |
) | |
with gr.Blocks() as interface: | |
# Dropdown to select the model | |
model_dropdown = gr.Dropdown(choices=models, value=models[0], label="Select Model") | |
# Audio input component | |
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Audio") | |
# Text output component | |
text_output = gr.Textbox(label="Transcribed Text") | |
# Button to trigger transcription | |
transcribe_button = gr.Button("Transcribe") | |
# Event listener to initialize the pipeline when the model is selected | |
model_dropdown.change(fn=initialize_pipeline, inputs=model_dropdown, outputs=None) | |
# Event listener to transcribe the audio when the button is clicked | |
transcribe_button.click(fn=transcribe, inputs=[audio_input], outputs=text_output) | |
if __name__ == "__main__": | |
interface.launch() | |