import gradio as gr
from transformers import pipeline
import librosa

# Initialize the model
asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")

def transcribe(audio_data):
    # librosa expects a file path, but gradio passes a tuple (file name, file object)
    # If the audio comes from a microphone, it's in the second position of the tuple
    if isinstance(audio_data, tuple):
        audio_data = audio_data[1]

    # Load the audio file with librosa
    data, samplerate = librosa.load(audio_data, sr=None)
    # Pass the audio data to the model for transcription
    transcription = asr_model(data, sampling_rate=samplerate)
    return transcription["text"]

# Create the Gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="file", label="Record or Upload Audio"),
    outputs="text"
)

iface.launch()