File size: 981 Bytes
46aa3a0
 
 
 
 
 
b248823
46aa3a0
 
 
 
 
 
 
 
 
 
 
 
7766367
46aa3a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
from transformers import pipeline
from pydub import AudioSegment
import os

# Load Whisper pipeline
asr=pipeline("audio-classification", model="firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3")

def convert_audio_to_wav(audio_path):
    """Convert audio to WAV format"""
    audio = AudioSegment.from_file(audio_path)
    wav_path = audio_path + ".wav"
    audio.export(wav_path, format="wav")
    return wav_path

def transcribe(audio_path):
    wav_path = convert_audio_to_wav(audio_path)
    result = asr(wav_path)
    os.remove(wav_path)
    return result[0]

# Gradio interface (DO NOT use share=True)
demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath", label="Upload Audio (.m4a, .mp3, .wav...)"),
    outputs=gr.Textbox(label="Transcription"),
    title="Whisper Speech emotion Recognition",
    description="Transcribes most audio formats using Whisper."
)

# Just launch it — no share=True!
demo.launch()