import gradio as gr from transformers import pipeline import librosa # Initialize the model asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small") def transcribe(audio_data): # librosa expects a file path, but gradio passes a tuple (file name, file object) # If the audio comes from a microphone, it's in the second position of the tuple if isinstance(audio_data, tuple): audio_data = audio_data[1] # Load the audio file with librosa data, samplerate = librosa.load(audio_data, sr=None) # Pass the audio data to the model for transcription transcription = asr_model(data, sampling_rate=samplerate) return transcription["text"] # Create the Gradio interface iface = gr.Interface( fn=transcribe, inputs=gr.Audio(type="file", label="Record or Upload Audio"), outputs="text" ) iface.launch()