import cv2 import torch from ultralytics import YOLO from fastapi import FastAPI from pydantic import BaseModel from typing import List app = FastAPI() # Load YOLO model model = YOLO("yolov11s-face.pt") # Constants KNOWN_DISTANCE = 50 # cm KNOWN_FACE_WIDTH = 14 # cm REF_IMAGE_FACE_WIDTH = 120 # pixels FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH SCALING_FACTOR = 2.0 # Experimental scaling fix # Store detection history detected_people_history = [] class FaceDetectionResponse(BaseModel): position: str distance: float class DetectionResult(BaseModel): faces: List[FaceDetectionResponse] @app.get("/detect", response_model=DetectionResult) def detect_faces(): cap = cv2.VideoCapture(0) ret, frame = cap.read() cap.release() if not ret: return {"faces": []} frame_width = frame.shape[1] results = model(frame) new_people_data = [] for result in results: for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) conf = box.conf[0].item() if conf > 0.5: center_x = (x1 + x2) // 2 face_width_pixels = x2 - x1 # Determine position if center_x < frame_width // 3: position = "Left" elif center_x > 2 * frame_width // 3: position = "Right" else: position = "Center" # Calculate distance estimated_distance = (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels estimated_distance *= SCALING_FACTOR new_people_data.append(FaceDetectionResponse( position=position, distance=round(estimated_distance, 1) )) # Check if new people are detected change_detected = False if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]): change_detected = True if change_detected: detected_people_history.append(new_people_data) # Save the latest detection return {"faces": new_people_data} else: return {"faces": []} # No significant change detected