Spaces:
Sleeping
Sleeping
File size: 1,832 Bytes
23794e8 50fb5fd 23794e8 5f21810 50fb5fd 0ce1e34 5f21810 50fb5fd 0ce1e34 5f21810 23794e8 50fb5fd 23794e8 50fb5fd 23794e8 50fb5fd 0ce1e34 50fb5fd 23794e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import torch
import gradio as gr
from transformers import pipeline
models = ["openai/whisper-small", "openai/whisper-base", "openai/whisper-medium", "openai/whisper-large"]
pipe = pipeline(task="automatic-speech-recognition",
model="openai/whisper-small",
device="cuda" if torch.cuda.is_available() else "cpu")
def initialize_pipeline(model_name):
global pipe
pipe = pipeline(task="automatic-speech-recognition",
model=model_name,
device="cuda" if torch.cuda.is_available() else "cpu")
return model_name
def transcribe(audio):
if audio is None:
return "No audio input received. Please try again."
text = pipe(audio)["text"]
return text
interface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
outputs="text",
title="Whisper Small",
description="Realtime demo for Speech recognition using a Whisper small model.",
)
with gr.Blocks() as interface:
# Dropdown to select the model
model_dropdown = gr.Dropdown(choices=models, value=models[0], label="Select Model")
# Audio input component
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Audio")
# Text output component
text_output = gr.Textbox(label="Transcribed Text")
# Button to trigger transcription
transcribe_button = gr.Button("Transcribe")
# Event listener to initialize the pipeline when the model is selected
model_dropdown.change(fn=initialize_pipeline, inputs=model_dropdown, outputs=None)
# Event listener to transcribe the audio when the button is clicked
transcribe_button.click(fn=transcribe, inputs=[audio_input], outputs=text_output)
if __name__ == "__main__":
interface.launch()
|