import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from ultralytics import YOLO


model_path = "best.pt" 
model = YOLO(model_path)

def preprocessing(image):
    
    image = Image.fromarray(np.array(image))

    image = ImageEnhance.Sharpness(image).enhance(2.0)  
    image = ImageEnhance.Contrast(image).enhance(1.5)   
    image = ImageEnhance.Brightness(image).enhance(0.8) 

   
    width = 800
    aspect_ratio = image.height / image.width
    height = int(width * aspect_ratio)
    image = image.resize((width, height))

    return image


def imageRotation(image):
    """Dummy function for image rotation."""
    return image


def detect_document(image):
    """Detects front and back of the document using YOLO."""
    image = np.array(image)
    results = model(image, conf=0.85)

    detected_classes = set()  
    labels = []
    bounding_boxes = []

    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0]
            cls = int(box.cls[0])
            class_name = model.names[cls]

            detected_classes.add(class_name)
            label = f"{class_name} {conf:.2f}"
            labels.append(label)
            bounding_boxes.append((x1, y1, x2, y2, class_name, conf))  # Store bounding box with class and confidence

            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    possible_classes = {"front", "back"}
    missing_classes = possible_classes - detected_classes
    if missing_classes:
        labels.append(f"Missing: {', '.join(missing_classes)}")

    return Image.fromarray(image), labels, bounding_boxes


def crop_image(image, bounding_boxes):
    """Crops detected bounding boxes from the image."""
    cropped_images = {}
    image = np.array(image)

    for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
        cropped = image[y1:y2, x1:x2]
        cropped_images[class_name] = Image.fromarray(cropped)

    return cropped_images


def vision_ai_api(image, doc_type):
    """Dummy API call for Vision AI, returns a fake JSON response."""
    return {
        "document_type": doc_type,
        "extracted_text": "Dummy OCR result for " + doc_type,
        "confidence": 0.99
    }

# ---------------- Prediction Function ---------------- #
def predict(image):
    """Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
    processed_image = preprocessing(image)
    rotated_image = imageRotation(processed_image)  
    detected_image, labels, bounding_boxes = detect_document(rotated_image)

    cropped_images = crop_image(rotated_image, bounding_boxes)

    # Call Vision AI separately for front and back if detected
    front_result, back_result = None, None
    if "front" in cropped_images:
        front_result = vision_ai_api(cropped_images["front"], "front")
    if "back" in cropped_images:
        back_result = vision_ai_api(cropped_images["back"], "back")


    api_results = {
        "front": front_result,
        "back": back_result
    }

    return detected_image, labels, api_results


iface = gr.Interface(
    fn=predict, 
    inputs="image", 
    outputs=["image", "text", "json"],  
    title="License Field Detection (Front & Back Card)"
)

iface.launch()