Spaces:
Running
Running
import cv2 | |
import torch | |
from ultralytics import YOLO | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from typing import List | |
app = FastAPI() | |
# Load YOLO model | |
model = YOLO("yolov11s-face.pt") | |
# Constants | |
KNOWN_DISTANCE = 50 # cm | |
KNOWN_FACE_WIDTH = 14 # cm | |
REF_IMAGE_FACE_WIDTH = 120 # pixels | |
FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH | |
SCALING_FACTOR = 2.0 # Experimental scaling fix | |
# Store detection history | |
detected_people_history = [] | |
class FaceDetectionResponse(BaseModel): | |
position: str | |
distance: float | |
class DetectionResult(BaseModel): | |
faces: List[FaceDetectionResponse] | |
def detect_faces(): | |
cap = cv2.VideoCapture(0) | |
ret, frame = cap.read() | |
cap.release() | |
if not ret: | |
return {"faces": []} | |
frame_width = frame.shape[1] | |
results = model(frame) | |
new_people_data = [] | |
for result in results: | |
for box in result.boxes: | |
x1, y1, x2, y2 = map(int, box.xyxy[0]) | |
conf = box.conf[0].item() | |
if conf > 0.5: | |
center_x = (x1 + x2) // 2 | |
face_width_pixels = x2 - x1 | |
# Determine position | |
if center_x < frame_width // 3: | |
position = "Left" | |
elif center_x > 2 * frame_width // 3: | |
position = "Right" | |
else: | |
position = "Center" | |
# Calculate distance | |
estimated_distance = (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels | |
estimated_distance *= SCALING_FACTOR | |
new_people_data.append(FaceDetectionResponse( | |
position=position, | |
distance=round(estimated_distance, 1) | |
)) | |
# Check if new people are detected | |
change_detected = False | |
if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]): | |
change_detected = True | |
if change_detected: | |
detected_people_history.append(new_people_data) # Save the latest detection | |
return {"faces": new_people_data} | |
else: | |
return {"faces": []} # No significant change detected | |