File size: 2,002 Bytes
39416b3 d7b55bd 39416b3 202a7dd d7b55bd 9d1f2fb e147914 39416b3 d7b55bd 3a18141 e147914 3a18141 e147914 3a18141 aba0045 e147914 3a18141 39416b3 1337340 39416b3 3a18141 39416b3 3a18141 aba0045 39416b3 ffab20a 39416b3 ffab20a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import torch
import librosa
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
MODEL_NAME = "ameliabb0913/emotion-classifier1"
processor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME, trust_remote_code=True)
model.eval()
# Emotion labels (based on the dataset used to train the model)
id2label = {
0: "Neutral",
1: "Happy",
2: "Sad",
3: "Angry",
4: "Fearful",
5: "Disgusted",
6: "Surprised"
}
emotion_emojis = {
"Neutral": "π",
"Happy": "π",
"Sad": "π’",
"Angry": "π ",
"Fearful": "π¨",
"Disgusted": "π€’",
"Surprised": "π²"
}
# Function to classify emotions from audio
def classify_emotion(audio_file):
# Load and process audio
speech, sr = librosa.load(audio_file, sr=16000)
inputs = processor(
speech,
sampling_rate=16000,
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=48000 # You can adjust this depending on expected audio length
)
# Get predictions
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = torch.argmax(logits, dim=-1).item()
# Convert class ID to emotion label
predicted_emotion = id2label.get(predicted_class_id, "Unknown")
emoji = emotion_emojis.get(predicted_emotion, "β")
return f"Predicted Emotion: {predicted_emotion} {emoji}"
# Gradio Interface
interface = gr.Interface(
fn=classify_emotion,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="π§ Speak Your Emotion | AI Emotion Detector",
description=(
"π€ Upload a voice clip or speak into the mic β this AI will identify the **emotion** in your voice!\n\n"
"**Supported 8 Emotions**: Neutral, Happy, Sad, Angry, Fearful, Disgusted, Surprised."
)
# Launch the app
if __name__ == "__main__":
interface.launch()
|