import gradio as gr import torch import librosa from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification MODEL_NAME = "ameliabb0913/emotion-classifier1" processor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME) model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME, trust_remote_code=True) model.eval() # Emotion labels (based on the dataset used to train the model) id2label = { 0: "Neutral", 1: "Happy", 2: "Sad", 3: "Angry", 4: "Fearful", 5: "Disgusted", 6: "Surprised" } emotion_emojis = { "Neutral": "😐", "Happy": "😊", "Sad": "😢", "Angry": "😠", "Fearful": "😨", "Disgusted": "🤢", "Surprised": "😲" } # Function to classify emotions from audio def classify_emotion(audio_file): # Load and process audio speech, sr = librosa.load(audio_file, sr=16000) inputs = processor( speech, sampling_rate=16000, return_tensors="pt", padding="max_length", truncation=True, max_length=48000 # You can adjust this depending on expected audio length ) # Get predictions with torch.no_grad(): logits = model(**inputs).logits predicted_class_id = torch.argmax(logits, dim=-1).item() # Convert class ID to emotion label predicted_emotion = id2label.get(predicted_class_id, "Unknown") emoji = emotion_emojis.get(predicted_emotion, "❓") return f"Predicted Emotion: {predicted_emotion} {emoji}" # Gradio Interface interface = gr.Interface( fn=classify_emotion, inputs=gr.Audio(type="filepath"), outputs="text", title="🎧 Speak Your Emotion | AI Emotion Detector", description=( "🎤 Upload a voice clip or speak into the mic — this AI will identify the **emotion** in your voice!\n\n" "**Supported 8 Emotions**: Neutral, Happy, Sad, Angry, Fearful, Disgusted, Surprised." )) # Launch the app if __name__ == "__main__": interface.launch()