|
import os
|
|
from fastai.vision.all import *
|
|
import gradio as gr
|
|
import pickle
|
|
import tempfile
|
|
from transformers import AutoTokenizer, AutoModelWithLMHead
|
|
from speechbrain.inference.interfaces import foreign_class
|
|
|
|
|
|
|
|
|
|
|
|
|
|
learn_emotion = load_learner('emotions_vgg19.pkl')
|
|
learn_emotion_labels = learn_emotion.dls.vocab
|
|
|
|
|
|
|
|
def predict(img):
|
|
img = PILImage.create(img)
|
|
pred_emotion, pred_emotion_idx, probs_emotion = learn_emotion.predict(img)
|
|
predicted_emotion = learn_emotion_labels[pred_emotion_idx]
|
|
return predicted_emotion
|
|
|
|
|
|
|
|
title = "Facial Emotion Detector"
|
|
|
|
description = gr.Markdown(
|
|
"""Ever wondered what a person might be feeling looking at their picture?
|
|
Well, now you can! Try this fun app. Just upload a facial image in JPG or
|
|
PNG format. You can now see what they might have felt when the picture
|
|
was taken.
|
|
|
|
**Tip**: Be sure to only include face to get best results. Check some sample images
|
|
below for inspiration!""").value
|
|
|
|
article = gr.Markdown(
|
|
"""**DISCLAIMER:** This model does not reveal the actual emotional state of a person. Use and
|
|
interpret results at your own risk!.
|
|
|
|
**PREMISE:** The idea is to determine an overall emotion of a person
|
|
based on the pictures. We are restricting pictures to only include close-up facial
|
|
images.
|
|
|
|
**DATA:** FER2013 dataset consists of 48x48 pixel grayscale images of faces.Images
|
|
are assigned one of the 7 emotions: Angry, Disgust, Fear, Happy, Sad, Surprise, and Neutral.
|
|
|
|
""").value
|
|
|
|
enable_queue=True
|
|
|
|
examples = ['happy1.jpg', 'happy2.jpg', 'angry1.png', 'angry2.jpg', 'neutral1.jpg', 'neutral2.jpg']
|
|
|
|
image_mode=gr.Interface(fn = predict,
|
|
inputs = gr.Image( image_mode='L'),
|
|
outputs = [gr.Label(label='Emotion')],
|
|
title = title,
|
|
examples = examples,
|
|
description = description,
|
|
article=article,
|
|
allow_flagging='never')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open("emotion_tokenizer.pkl", "rb") as f:
|
|
tokenizer = pickle.load(f)
|
|
|
|
with open("emotion_model.pkl", "rb") as f:
|
|
model = pickle.load(f)
|
|
|
|
|
|
|
|
def classify_emotion(text):
|
|
|
|
input_ids = tokenizer.encode("emotion: " + text, return_tensors="pt")
|
|
output = model.generate(input_ids)
|
|
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
|
|
|
if output_text in ["joy", "love"]:
|
|
return "Positive"
|
|
elif output_text == "surprise":
|
|
return "Neutral"
|
|
else:
|
|
return "Negative"
|
|
return output_text
|
|
|
|
|
|
text_model = gr.Interface(fn=classify_emotion, inputs="textbox", outputs="textbox")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
|
|
|
|
def save_uploaded_file(uploaded_file):
|
|
temp_dir = tempfile.TemporaryDirectory()
|
|
file_path = os.path.join(temp_dir.name, uploaded_file.name)
|
|
with open(file_path, "wb") as f:
|
|
f.write(uploaded_file.getbuffer())
|
|
return file_path
|
|
|
|
|
|
def emotion(file_path):
|
|
|
|
if file_path:
|
|
|
|
out_prob, score, index, text_lab = classifier.classify_file(file_path)
|
|
if isinstance(text_lab, list):
|
|
text_lab = text_lab[0]
|
|
|
|
emotion_mapping = {
|
|
'neu': 'Neutral',
|
|
'ang': 'Angry',
|
|
'hap': 'Happy',
|
|
'sad': 'Sadness'
|
|
}
|
|
|
|
emotion_category = emotion_mapping.get(text_lab, 'Unknown')
|
|
|
|
emotion_category = emotion_mapping.get(text_lab, 'Unknown')
|
|
|
|
return emotion_category
|
|
else:
|
|
return "Please provide the path to an audio file."
|
|
|
|
|
|
|
|
|
|
|
|
audio_model = gr.Interface(fn=emotion, inputs="textbox", outputs="textbox")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main_model = gr.TabbedInterface([text_model, image_mode,audio_model], ["Text Emotion Recognition", "Image Emotion Recognition" , "Audio Emotion Recognition"])
|
|
|
|
if _name_ == "_main_":
|
|
main_model.launch() |