import gradio as gr from transformers import pipeline from pydub import AudioSegment import os # Load Whisper pipeline asr=pipeline("audio-classification", model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3") def convert_audio_to_wav(audio_path): """Convert audio to WAV format""" audio = AudioSegment.from_file(audio_path) wav_path = audio_path + ".wav" audio.export(wav_path, format="wav") return wav_path def transcribe(audio_path): wav_path = convert_audio_to_wav(audio_path) result = asr(wav_path) os.remove(wav_path) return result[0] # Gradio interface (DO NOT use share=True) demo = gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath", label="Upload Audio (.m4a, .mp3, .wav...)"), outputs=gr.Textbox(label="Transcription"), title="Whisper Speech emotion Recognition", description="Transcribes most audio formats using Whisper." ) # Just launch it — no share=True! demo.launch()