import cv2
import numpy as np
from ultralytics import YOLO
import torch
import os

# Verify MPS availability
print(f"MPS available: {torch.backends.mps.is_available()}")

# Load the trained model
model = YOLO('best.pt')  # Path to your trained model
if torch.backends.mps.is_available():
    model.to('mps')

# Define class names (match your custom_dataset.yaml)
class_names = ['safe driving', 'drinking', 'eating', 'hair and makeup', 'operating radio', 'talking on phone', 'talking to passenger']

# Open the webcam (0 is usually the default camera)
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Set frame width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 320)

def trigger_alarm(action):
    """Trigger an alarm if the action is not 'safe driving'."""
    if action != 'safe driving':
        print(f"ALARM: Unsafe behavior detected - {action}!")  # Text-based alarm for Mac
        # Optional sound alarm (uncomment and add alarm.wav if using playsound)
        # try:
        #     from playsound import playsound
        #     playsound(os.path.join(os.getcwd(), 'alarm.wav'))
        # except Exception as e:
        #     print(f"Sound alarm failed: {e}")

while cap.isOpened():
    # Read frame from webcam
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # Perform inference with confidence threshold
    results = model(frame, conf=0.1)  # Lower confidence threshold for more detections

    # Process results
    display_text = "Safe driving"
    for result in results:
        boxes = result.boxes.xyxy.cpu().numpy()  # Bounding box coordinates
        scores = result.boxes.conf.cpu().numpy()  # Confidence scores
        classes = result.boxes.cls.cpu().numpy()  # Class indices

        if len(boxes) > 0:
            # Get the most confident detection
            max_score_idx = scores.argmax()
            detected_action = class_names[int(classes[max_score_idx])]
            confidence = scores[max_score_idx]
            display_text = f"{detected_action}: {confidence:.2f}"
            # Trigger alarm if not safe driving
            trigger_alarm(detected_action)

    # Display the action in top-left corner (no bounding boxes)
    cv2.putText(frame, display_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the result
    cv2.imshow('YOLO Webcam Detection', frame)

    # Break loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()