File size: 2,310 Bytes
dea3c82
 
 
312f686
 
 
dea3c82
312f686
 
 
 
dea3c82
312f686
dea3c82
 
312f686
dea3c82
312f686
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58f6639
312f686
 
 
58f6639
 
 
 
312f686
 
58f6639
 
 
 
 
 
312f686
58f6639
312f686
 
 
 
 
 
 
 
58f6639
312f686
58f6639
 
 
 
 
312f686
58f6639
312f686
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import cv2
import torch
from ultralytics import YOLO
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List

app = FastAPI()

# Load YOLO model
model = YOLO("yolov11s-face.pt")

# Constants
KNOWN_DISTANCE = 50  # cm
KNOWN_FACE_WIDTH = 14  # cm
REF_IMAGE_FACE_WIDTH = 120  # pixels
FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
SCALING_FACTOR = 2.0  # Experimental scaling fix

# Store detection history
detected_people_history = []

class FaceDetectionResponse(BaseModel):
    position: str
    distance: float

class DetectionResult(BaseModel):
    faces: List[FaceDetectionResponse]

@app.get("/detect", response_model=DetectionResult)
def detect_faces():
    cap = cv2.VideoCapture(0)
    ret, frame = cap.read()
    cap.release()
    
    if not ret:
        return {"faces": []}
    
    frame_width = frame.shape[1]
    results = model(frame)
    new_people_data = []
    
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0].item()

            if conf > 0.5:
                center_x = (x1 + x2) // 2
                face_width_pixels = x2 - x1
                
                # Determine position
                if center_x < frame_width // 3:
                    position = "Left"
                elif center_x > 2 * frame_width // 3:
                    position = "Right"
                else:
                    position = "Center"
                
                # Calculate distance
                estimated_distance = (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
                estimated_distance *= SCALING_FACTOR
                
                new_people_data.append(FaceDetectionResponse(
                    position=position,
                    distance=round(estimated_distance, 1)
                ))
    
    # Check if new people are detected
    change_detected = False
    if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
        change_detected = True

    if change_detected:
        detected_people_history.append(new_people_data)  # Save the latest detection
        return {"faces": new_people_data}
    else:
        return {"faces": []}  # No significant change detected