Spaces:

syedfaisalabrar
/

License_Classification

Sleeping

App Files Files Community

License_Classification / app.py

syedfaisalabrar

Update app.py

f9de43f verified 2 months ago

raw

history blame

4.08 kB

	import gradio as gr
	import torch
	import cv2
	import numpy as np
	from PIL import Image, ImageEnhance
	from ultralytics import YOLO

	# Load YOLOv11 Model
	model_path = "best.pt"
	model = YOLO(model_path)

	# ---------------- Preprocessing Function ---------------- #
	def preprocessing(image):
	"""Apply three enhancement filters, including brightness reduction, and resize."""
	image = Image.fromarray(np.array(image))

	# Apply enhancements
	image = ImageEnhance.Sharpness(image).enhance(2.0) # Increase sharpness
	image = ImageEnhance.Contrast(image).enhance(1.5) # Increase contrast
	image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness

	# Resize image to 800px width while maintaining aspect ratio
	width = 800
	aspect_ratio = image.height / image.width
	height = int(width * aspect_ratio)
	image = image.resize((width, height))

	return image

	# ---------------- Dummy Image Rotation Function ---------------- #
	def imageRotation(image):
	"""Dummy function for image rotation."""
	return image

	# ---------------- Document Detection Function ---------------- #
	def detect_document(image):
	"""Detects front and back of the document using YOLO."""
	image = np.array(image)
	results = model(image, conf=0.85)

	detected_classes = set()
	labels = []
	bounding_boxes = []

	for result in results:
	for box in result.boxes:
	x1, y1, x2, y2 = map(int, box.xyxy[0])
	conf = box.conf[0]
	cls = int(box.cls[0])
	class_name = model.names[cls]

	detected_classes.add(class_name)
	label = f"{class_name} {conf:.2f}"
	labels.append(label)
	bounding_boxes.append((x1, y1, x2, y2, class_name, conf)) # Store bounding box with class and confidence

	cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
	cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	possible_classes = {"front", "back"}
	missing_classes = possible_classes - detected_classes
	if missing_classes:
	labels.append(f"Missing: {', '.join(missing_classes)}")

	return Image.fromarray(image), labels, bounding_boxes

	# ---------------- Cropping Function ---------------- #
	def crop_image(image, bounding_boxes):
	"""Crops detected bounding boxes from the image."""
	cropped_images = {}
	image = np.array(image)

	for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
	cropped = image[y1:y2, x1:x2]
	cropped_images[class_name] = Image.fromarray(cropped)

	return cropped_images

	# ---------------- Vision AI API Call (Dummy) ---------------- #
	def vision_ai_api(image, doc_type):
	"""Dummy API call for Vision AI, returns a fake JSON response."""
	return {
	"document_type": doc_type,
	"extracted_text": "Dummy OCR result for " + doc_type,
	"confidence": 0.99
	}

	# ---------------- Prediction Function ---------------- #
	def predict(image):
	"""Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
	processed_image = preprocessing(image)
	rotated_image = imageRotation(processed_image) # Placeholder for rotation
	detected_image, labels, bounding_boxes = detect_document(rotated_image)

	cropped_images = crop_image(rotated_image, bounding_boxes)

	# Call Vision AI separately for front and back if detected
	front_result, back_result = None, None
	if "front" in cropped_images:
	front_result = vision_ai_api(cropped_images["front"], "front")
	if "back" in cropped_images:
	back_result = vision_ai_api(cropped_images["back"], "back")

	# Combine API results into a single response
	api_results = {
	"front": front_result,
	"back": back_result
	}

	return detected_image, labels, api_results

	# ---------------- Gradio Interface ---------------- #
	iface = gr.Interface(
	fn=predict,
	inputs="image",
	outputs=["image", "text", "json"],
	title="License Field Detection (Front & Back Card)"
	)

	iface.launch()