import gradio as gr from transformers import CLIPModel, CLIPProcessor from PIL import Image import requests # Step 1: Load Fine-Tuned Model from Hugging Face Model Hub model_name = "quadranttechnologies/retail-content-safety-clip-finetuned" print("Initializing the application...") try: print("Loading the model from Hugging Face Model Hub...") model = CLIPModel.from_pretrained(model_name, trust_remote_code=True) processor = CLIPProcessor.from_pretrained(model_name) print("Model and processor loaded successfully.") except Exception as e: print(f"Error loading the model or processor: {e}") raise RuntimeError(f"Failed to load model: {e}") # Step 2: Minimal Test Case to Verify Model and Processor try: print("Running a minimal test case with the model...") # Test Image URL url = "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png" image = Image.open(requests.get(url, stream=True).raw) # Define test categories test_categories = ["safe", "unsafe"] # Process the image test_inputs = processor(text=test_categories, images=image, return_tensors="pt", padding=True) print(f"Test inputs processed: {test_inputs}") # Perform inference test_outputs = model(**test_inputs) print(f"Test outputs: {test_outputs}") # Check probabilities test_logits = test_outputs.logits_per_image test_probs = test_logits.softmax(dim=1) print(f"Test probabilities: {test_probs}") except Exception as e: print(f"Error during the minimal test case: {e}") raise RuntimeError(f"Test case failed: {e}") # Step 3: Define the Inference Function def classify_image(image): """ Classify an image as 'safe' or 'unsafe' and return probabilities. Args: image (PIL.Image.Image): Uploaded image. Returns: str: Predicted category. dict: Probabilities for "safe" and "unsafe". """ try: print("Starting image classification...") # Check if the image is valid if image is None: raise ValueError("No image provided. Please upload a valid image.") if not hasattr(image, "convert"): raise ValueError("Uploaded file is not a valid image format.") # Define main categories categories = ["safe", "unsafe"] print(f"Categories: {categories}") # Process the image print("Processing the image with the processor...") inputs = processor(text=categories, images=image, return_tensors="pt", padding=True) print(f"Processed inputs: {inputs}") # Perform inference print("Running model inference...") outputs = model(**inputs) print(f"Model outputs: {outputs}") # Calculate probabilities logits_per_image = outputs.logits_per_image probs = logits_per_image.softmax(dim=1) print(f"Probabilities: {probs}") # Extract probabilities for each category safe_prob = probs[0][0].item() * 100 unsafe_prob = probs[0][1].item() * 100 # Determine the predicted category predicted_category = "safe" if safe_prob > unsafe_prob else "unsafe" print(f"Predicted category: {predicted_category}") # Return the predicted category and probabilities return predicted_category, {"safe": f"{safe_prob:.2f}%", "unsafe": f"{unsafe_prob:.2f}%"} except Exception as e: print(f"Error during classification: {e}") return f"Error: {str(e)}", {} # Step 4: Set Up Gradio Interface iface = gr.Interface( fn=classify_image, inputs=gr.Image(type="pil"), outputs=[ gr.Textbox(label="Predicted Category"), # Display the predicted category prominently gr.Label(label="Probabilities"), # Display probabilities with a progress bar ], title="Content Safety Classification", description="Upload an image to classify it as 'safe' or 'unsafe' with corresponding probabilities.", ) # Step 5: Launch Gradio Interface if __name__ == "__main__": print("Launching Gradio interface...") iface.launch()