import cv2 import numpy as np from ultralytics import YOLO import torch import os # Verify MPS availability print(f"MPS available: {torch.backends.mps.is_available()}") # Load the trained model model = YOLO('best.pt') # Path to your trained model if torch.backends.mps.is_available(): model.to('mps') # Define class names (match your custom_dataset.yaml) class_names = ['safe driving', 'drinking', 'eating', 'hair and makeup', 'operating radio', 'talking on phone', 'talking to passenger'] # Open the webcam (0 is usually the default camera) cap = cv2.VideoCapture(0) if not cap.isOpened(): print("Error: Could not open webcam.") exit() # Set frame width and height cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 320) def trigger_alarm(action): """Trigger an alarm if the action is not 'safe driving'.""" if action != 'safe driving': print(f"ALARM: Unsafe behavior detected - {action}!") # Text-based alarm for Mac # Optional sound alarm (uncomment and add alarm.wav if using playsound) # try: # from playsound import playsound # playsound(os.path.join(os.getcwd(), 'alarm.wav')) # except Exception as e: # print(f"Sound alarm failed: {e}") while cap.isOpened(): # Read frame from webcam ret, frame = cap.read() if not ret: print("Error: Could not read frame.") break # Perform inference with confidence threshold results = model(frame, conf=0.1) # Lower confidence threshold for more detections # Process results display_text = "Safe driving" for result in results: boxes = result.boxes.xyxy.cpu().numpy() # Bounding box coordinates scores = result.boxes.conf.cpu().numpy() # Confidence scores classes = result.boxes.cls.cpu().numpy() # Class indices if len(boxes) > 0: # Get the most confident detection max_score_idx = scores.argmax() detected_action = class_names[int(classes[max_score_idx])] confidence = scores[max_score_idx] display_text = f"{detected_action}: {confidence:.2f}" # Trigger alarm if not safe driving trigger_alarm(detected_action) # Display the action in top-left corner (no bounding boxes) cv2.putText(frame, display_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # Display the result cv2.imshow('YOLO Webcam Detection', frame) # Break loop if 'q' is pressed if cv2.waitKey(1) & 0xFF == ord('q'): break # Cleanup cap.release() cv2.destroyAllWindows()