import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
import onnxruntime
from pathlib import Path
from ultralytics import YOLO

# Load YOLOv5 model for ONNX export
model = YOLO("yolov5n.pt")  # Use "yolov5x.pt" if you want the larger model

# Export to ONNX format
model.export(format="onnx", dynamic=True)


os.makedirs("models", exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model_path = Path("models/yolov5n.onnx")

if not model_path.exists():
    print("Downloading YOLOv5n model and converting to ONNX...")
    model = torch.hub.load("ultralytics/yolov5", "yolov5n", pretrained=True).to(device)
    model.eval()
    
    # Exporting to ONNX
    model.export(format="onnx", dynamic=True)
    os.rename("yolov5n.onnx", model_path)
    del model  # Free memory

# Loading ONNX model for ultra-fast inference
session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider'])

# Generate random colors for each class
np.random.seed(42)
colors = np.random.uniform(0, 255, size=(80, 3))  

total_inference_time = 0
inference_count = 0

def detect_objects(image):
    global total_inference_time, inference_count

    if image is None:
        return None

    start_time = time.time()

    image = cv2.resize(image, (416, 416))  
    image = image.astype(np.float32) / 255.0 
    image = np.transpose(image, (2, 0, 1))  
    image = np.expand_dims(image, axis=0)  

    # Run inference
    inputs = {session.get_inputs()[0].name: image}
    output = session.run(None, inputs)
    detections = output[0][0]

    inference_time = time.time() - start_time
    total_inference_time += inference_time
    inference_count += 1
    avg_inference_time = total_inference_time / inference_count
    fps = 1 / inference_time

    # Draw bounding boxes
    output_image = image[0].transpose(1, 2, 0) * 255 
    output_image = output_image.astype(np.uint8)

    for det in detections:
        x1, y1, x2, y2, conf, class_id = map(int, det[:6])
        if conf < 0.3:  # Confidence threshold
            continue

        color = colors[class_id].tolist()
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 3)
        label = f"Class {class_id} {conf:.2f}"
        cv2.putText(output_image, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

    # Display FPS
    cv2.putText(output_image, f"FPS: {fps:.2f}", (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (20, 70),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    return output_image

# Gradio Interface
example_images = ["spring_street_after.jpg", "pexels-hikaique-109919.jpg"]
os.makedirs("examples", exist_ok=True)

with gr.Blocks(title="Optimized YOLOv5 Object Detection") as demo:
    gr.Markdown("# **Optimized YOLOv5 Object Detection** 🚀")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(label="Input Image", type="numpy")
            submit_button = gr.Button("Detect Objects", variant="primary")
            clear_button = gr.Button("Clear")

        with gr.Column(scale=1):
            output_image = gr.Image(label="Detected Objects", type="numpy")

    gr.Examples(
        examples=example_images,
        inputs=input_image,
        outputs=output_image,
        fn=detect_objects,
        cache_examples=True
    )

    submit_button.click(fn=detect_objects, inputs=input_image, outputs=output_image)
    clear_button.click(lambda: (None, None), None, [input_image, output_image])

demo.launch()