SER / app.py
MeloRFTW's picture
Update app.py
7766367 verified
raw
history blame contribute delete
981 Bytes
import gradio as gr
from transformers import pipeline
from pydub import AudioSegment
import os
# Load Whisper pipeline
asr=pipeline("audio-classification", model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3")
def convert_audio_to_wav(audio_path):
"""Convert audio to WAV format"""
audio = AudioSegment.from_file(audio_path)
wav_path = audio_path + ".wav"
audio.export(wav_path, format="wav")
return wav_path
def transcribe(audio_path):
wav_path = convert_audio_to_wav(audio_path)
result = asr(wav_path)
os.remove(wav_path)
return result[0]
# Gradio interface (DO NOT use share=True)
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath", label="Upload Audio (.m4a, .mp3, .wav...)"),
outputs=gr.Textbox(label="Transcription"),
title="Whisper Speech emotion Recognition",
description="Transcribes most audio formats using Whisper."
)
# Just launch it — no share=True!
demo.launch()