|
import gradio as gr |
|
import torch |
|
import cv2 |
|
import numpy as np |
|
from PIL import Image |
|
from ultralytics import YOLO |
|
import json |
|
|
|
|
|
model_path = "best.pt" |
|
model = YOLO(model_path) |
|
|
|
def preprocess_image(image): |
|
"""Apply enhancement filters and resize image before detection.""" |
|
image = np.array(image) |
|
|
|
|
|
image = cv2.convertScaleAbs(image, alpha=0.8, beta=0) |
|
image = cv2.GaussianBlur(image, (3, 3), 0) |
|
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) |
|
image = cv2.filter2D(image, -1, kernel) |
|
|
|
|
|
height, width = image.shape[:2] |
|
new_width = 800 |
|
new_height = int((new_width / width) * height) |
|
image = cv2.resize(image, (new_width, new_height)) |
|
|
|
return image |
|
|
|
def imageRotation(image): |
|
"""Dummy function for now.""" |
|
return image |
|
|
|
def vision_ai_api(image, label): |
|
"""Dummy function simulating API call. Returns dummy JSON response.""" |
|
return { |
|
"label": label, |
|
"extracted_data": { |
|
"name": "John Doe", |
|
"dob": "01-01-1990", |
|
"id_number": "1234567890" |
|
} |
|
} |
|
|
|
def predict(image): |
|
image = preprocess_image(image) |
|
|
|
results = model(image, conf=0.85) |
|
detected_classes = set() |
|
labels = [] |
|
cropped_images = {} |
|
|
|
for result in results: |
|
for box in result.boxes: |
|
x1, y1, x2, y2 = map(int, box.xyxy[0]) |
|
conf = box.conf[0] |
|
cls = int(box.cls[0]) |
|
class_name = model.names[cls] |
|
|
|
detected_classes.add(class_name) |
|
labels.append(f"{class_name} {conf:.2f}") |
|
|
|
|
|
cropped = image[y1:y2, x1:x2] |
|
cropped_pil = Image.fromarray(cropped) |
|
|
|
|
|
api_response = vision_ai_api(cropped_pil, class_name) |
|
|
|
|
|
cropped_images[class_name] = { |
|
"image": cropped_pil, |
|
"api_response": json.dumps(api_response, indent=4) |
|
} |
|
|
|
|
|
possible_classes = {"front", "back"} |
|
missing_classes = possible_classes - detected_classes |
|
if missing_classes: |
|
labels.append(f"Missing: {', '.join(missing_classes)}") |
|
|
|
|
|
front_image = cropped_images.get("front", {}).get("image", None) |
|
back_image = cropped_images.get("back", {}).get("image", None) |
|
|
|
front_response = cropped_images.get("front", {}).get("api_response", "{}") |
|
back_response = cropped_images.get("back", {}).get("api_response", "{}") |
|
|
|
return front_image, front_response, back_image, back_response, labels |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs="image", |
|
outputs=["image", "text", "image", "text", "text"], |
|
title="License Field Detection (Front & Back Card)", |
|
description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each." |
|
) |
|
|
|
iface.launch() |
|
|