from transformers import pipeline import gradio as gr import time import torch p = pipeline("automatic-speech-recognition", model="ibm-granite/granite-speech-3.2-8b", torch_dtype=torch.bfloat16, trust_remote_code=True) def transcribe(audio, state=""): time.sleep(3) text = p(audio)["text"] state += text + " " return state, state gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), 'state' ], outputs=[ "textbox", "state" ], live=True).launch()