File size: 981 Bytes
46aa3a0 b248823 46aa3a0 7766367 46aa3a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import gradio as gr
from transformers import pipeline
from pydub import AudioSegment
import os
# Load Whisper pipeline
asr=pipeline("audio-classification", model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3")
def convert_audio_to_wav(audio_path):
"""Convert audio to WAV format"""
audio = AudioSegment.from_file(audio_path)
wav_path = audio_path + ".wav"
audio.export(wav_path, format="wav")
return wav_path
def transcribe(audio_path):
wav_path = convert_audio_to_wav(audio_path)
result = asr(wav_path)
os.remove(wav_path)
return result[0]
# Gradio interface (DO NOT use share=True)
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath", label="Upload Audio (.m4a, .mp3, .wav...)"),
outputs=gr.Textbox(label="Transcription"),
title="Whisper Speech emotion Recognition",
description="Transcribes most audio formats using Whisper."
)
# Just launch it — no share=True!
demo.launch()
|