File size: 4,297 Bytes
23e46f5 4aa2d44 89891d1 23e46f5 4aa2d44 23e46f5 b15ed16 8d812a8 23e46f5 4aa2d44 b15ed16 89891d1 23e46f5 b15ed16 eea5ff0 89891d1 eea5ff0 89891d1 23e46f5 b15ed16 23e46f5 b15ed16 eea5ff0 b15ed16 eea5ff0 b15ed16 23e46f5 4aa2d44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import sigmoid
import torch
from PIL import Image
from torchvision import transforms
import requests
# Load text emotion model
model_name = "SamLowe/roberta-base-go_emotions"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Load image emotion model (fine-tuned ResNet-50)
image_model_name = "Celal11/resnet-50-finetuned-FER2013CKPlus-0.003"
image_emotion_model = AutoModelForSequenceClassification.from_pretrained(image_model_name)
image_tokenizer = AutoTokenizer.from_pretrained("microsoft/resnet-50")
# Transform for image preprocessing
image_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# FER labels
image_labels = [
"Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral", "Contempt"
]
# Analyze image emotion
def analyze_image_emotion(image_path):
if image_path is None:
return "No image provided."
image = Image.open(image_path).convert("RGB")
img_tensor = image_transform(image).unsqueeze(0)
with torch.no_grad():
output = image_emotion_model(img_tensor)
probs = sigmoid(output.logits)[0]
top_idx = torch.argmax(probs).item()
return f"{image_labels[top_idx]} ({probs[top_idx]:.2f})"
# Emotion label to icon mapping (subset)
emotion_icons = {
"admiration": "๐",
"amusement": "๐
",
"anger": "๐ก",
"annoyance": "๐",
"approval": "๐",
"caring": "๐",
"confusion": "๐ค",
"curiosity": "๐ฎ",
"desire": "๐คค",
"disappointment": "๐",
"disapproval": "๐",
"disgust": "๐คฎ",
"embarrassment": "๐ณ",
"excitement": "๐",
"fear": "๐ฑ",
"gratitude": "๐",
"grief": "๐ญ",
"joy": "๐",
"love": "โค๏ธ",
"nervousness": "๐คง",
"optimism": "๐",
"pride": "๐",
"realization": "๐คฏ",
"relief": "๐",
"remorse": "๐",
"sadness": "๐ข",
"surprise": "๐ฒ",
"neutral": "๐"
}
# Analyze text emotion
def get_emotions(text, threshold):
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
logits = model(**inputs).logits
probs = sigmoid(logits)[0]
labels = [model.config.id2label[i] for i, p in enumerate(probs) if p > threshold]
icons = [emotion_icons.get(label, '') + ' ' + label.capitalize() + f" ({probs[i]:.2f})" for i, label in enumerate(labels)]
return ", ".join(icons) if icons else "No strong emotion detected."
# Combined analysis
def analyze_combined(text, threshold, image):
text_result = get_emotions(text, threshold)
image_result = analyze_image_emotion(image)
return text_result, image_result
# Gradio UI
custom_css = """
body {
background: linear-gradient(to right, #f9f9f9, #d4ecff);
font-family: 'Segoe UI', sans-serif;
}
.gr-button {
background-color: #007BFF !important;
color: white !important;
border-radius: 8px !important;
font-weight: bold;
}
.gr-button:hover {
background-color: #0056b3 !important;
}
.gr-textbox {
border-radius: 8px !important;
border: 1px solid #ccc !important;
padding: 10px !important;
}
.output-textbox {
font-size: 1.5rem;
font-weight: bold;
color: #333;
background-color: #f1f9ff;
border-radius: 8px;
padding: 10px;
border: 1px solid #007BFF;
}
"""
demo = gr.Interface(
fn=analyze_combined,
inputs=[
gr.Textbox(lines=5, placeholder="Write a sentence or a full paragraph...", label="Your Text"),
gr.Slider(minimum=0.1, maximum=0.9, value=0.3, step=0.05, label="Threshold"),
gr.Image(type="filepath", label="Upload Face Photo")
],
outputs=[
gr.Textbox(label="Detected Text Emotions", elem_classes=["output-textbox"]),
gr.Textbox(label="Detected Photo Emotion", elem_classes=["output-textbox"])
],
title="๐ฅฐ Multi-Modal Emotion Detector",
description="Analyze emotion from both text and a facial photo. Adjust the threshold for text emotion sensitivity.",
theme="default",
css=custom_css
)
demo.launch() |