import gradio as gr import torch import torch.nn as nn import torchvision.models as models import torchvision.transforms as transforms from PIL import Image from huggingface_hub import hf_hub_download ######################################## # 1. Define Model Architecture ######################################## class MultiTaskModel(nn.Module): def __init__(self, backbone, feature_dim, num_obj_classes): super(MultiTaskModel, self).__init__() self.backbone = backbone # Object recognition head self.obj_head = nn.Linear(feature_dim, num_obj_classes) # Binary classification head (0: AI-generated, 1: Real) self.bin_head = nn.Linear(feature_dim, 2) def forward(self, x): feats = self.backbone(x) obj_logits = self.obj_head(feats) bin_logits = self.bin_head(feats) return obj_logits, bin_logits ######################################## # 2. Reconstruct the Model and Load Weights ######################################## # Set the number of object classes (update this to match your training) num_obj_classes = 139 # Example; change as needed device = torch.device("cpu") # Instantiate the backbone (ResNet-50 without its final layer) resnet = models.resnet50(pretrained=False) resnet.fc = nn.Identity() feature_dim = 2048 # Build the model architecture model = MultiTaskModel(resnet, feature_dim, num_obj_classes) model.to(device) # Download the state dict from HF Hub repo_id = "Abdu07/multitask-model" filename = "best_model_new.pt" # Make sure this is the state dict file model_path = hf_hub_download(repo_id=repo_id, filename=filename) state_dict = torch.load(model_path, map_location="cpu") model.load_state_dict(state_dict) model.eval() ######################################## # 3. Define Label Mappings and Transforms ######################################## # Update these mappings with your actual training labels. idx_to_obj_label = { 0: "cat", 1: "dog", 2: "car", # ... add your object classes here ... } bin_label_names = ["AI-Generated", "Real"] # Define the validation transforms (same as used during training/validation) val_transforms = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ######################################## # 4. Define the Inference Function ######################################## def predict_image(img: Image.Image) -> str: """ Takes an uploaded PIL image, processes it, and returns the model's prediction. """ # Ensure image is in RGB img = img.convert("RGB") # Apply validation transforms img_tensor = val_transforms(img).unsqueeze(0).to(device) # Shape: [1, 3, 224, 224] with torch.no_grad(): obj_logits, bin_logits = model(img_tensor) obj_pred = torch.argmax(obj_logits, dim=1).item() bin_pred = torch.argmax(bin_logits, dim=1).item() obj_name = idx_to_obj_label.get(obj_pred, "Unknown") bin_name = bin_label_names[bin_pred] return f"Prediction: {obj_name} ({bin_name})" ######################################## # 5. Create Gradio UI ######################################## demo = gr.Interface( fn=predict_image, inputs=gr.Image(type="pil"), outputs="text", title="Multi-Task Image Classifier", description=( "Upload an image to receive two predictions:\n" "1) The primary object in the image,\n" "2) Whether the image is AI-generated or Real." ) ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", share=True)