Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,99 +1,78 @@
|
|
1 |
import cv2
|
2 |
-
import numpy as np
|
3 |
import torch
|
4 |
-
from collections import deque
|
5 |
from ultralytics import YOLO
|
6 |
-
from fastapi import FastAPI
|
7 |
-
import
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
12 |
|
13 |
-
# Constants
|
14 |
KNOWN_DISTANCE = 50 # cm
|
15 |
KNOWN_FACE_WIDTH = 14 # cm
|
16 |
-
REF_IMAGE_FACE_WIDTH = 120 #
|
17 |
FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
|
18 |
-
SCALING_FACTOR = 2.0 #
|
19 |
-
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
new_people_data = {}
|
44 |
-
change_detected = False
|
45 |
-
person_id = 1
|
46 |
-
|
47 |
-
frame_width = frame.shape[1] # Use the original frame width
|
48 |
-
|
49 |
for result in results:
|
50 |
-
for box in result.boxes
|
51 |
-
x1, y1, x2, y2
|
52 |
-
|
53 |
|
54 |
if conf > 0.5:
|
55 |
center_x = (x1 + x2) // 2
|
56 |
face_width_pixels = x2 - x1
|
57 |
-
|
58 |
-
# Determine position
|
59 |
if center_x < frame_width // 3:
|
60 |
position = "Left"
|
61 |
elif center_x > 2 * frame_width // 3:
|
62 |
position = "Right"
|
63 |
else:
|
64 |
position = "Center"
|
65 |
-
|
66 |
# Calculate distance
|
67 |
-
estimated_distance = (
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
# Check for significant change
|
77 |
-
if detected_people_history:
|
78 |
-
prev_data = detected_people_history[-1].get(f"person{person_id}")
|
79 |
-
if (
|
80 |
-
not prev_data
|
81 |
-
or prev_data["position"] != position
|
82 |
-
or abs(prev_data["distance_cm"] - estimated_distance) > DISTANCE_THRESHOLD
|
83 |
-
):
|
84 |
-
change_detected = True
|
85 |
-
|
86 |
-
person_id += 1
|
87 |
-
|
88 |
# Check if new people are detected
|
|
|
89 |
if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
|
90 |
change_detected = True
|
91 |
|
92 |
if change_detected:
|
93 |
detected_people_history.append(new_people_data) # Save the latest detection
|
94 |
-
return {"
|
95 |
else:
|
96 |
-
return {"
|
97 |
-
|
98 |
-
if __name__ == "__main__":
|
99 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
1 |
import cv2
|
|
|
2 |
import torch
|
|
|
3 |
from ultralytics import YOLO
|
4 |
+
from fastapi import FastAPI
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from typing import List
|
7 |
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
+
# Load YOLO model
|
11 |
+
model = YOLO("yolov11s-face.pt")
|
12 |
|
13 |
+
# Constants
|
14 |
KNOWN_DISTANCE = 50 # cm
|
15 |
KNOWN_FACE_WIDTH = 14 # cm
|
16 |
+
REF_IMAGE_FACE_WIDTH = 120 # pixels
|
17 |
FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
|
18 |
+
SCALING_FACTOR = 2.0 # Experimental scaling fix
|
19 |
+
|
20 |
+
# Store detection history
|
21 |
+
detected_people_history = []
|
22 |
+
|
23 |
+
class FaceDetectionResponse(BaseModel):
|
24 |
+
position: str
|
25 |
+
distance: float
|
26 |
+
|
27 |
+
class DetectionResult(BaseModel):
|
28 |
+
faces: List[FaceDetectionResponse]
|
29 |
+
|
30 |
+
@app.get("/detect", response_model=DetectionResult)
|
31 |
+
def detect_faces():
|
32 |
+
cap = cv2.VideoCapture(0)
|
33 |
+
ret, frame = cap.read()
|
34 |
+
cap.release()
|
35 |
+
|
36 |
+
if not ret:
|
37 |
+
return {"faces": []}
|
38 |
+
|
39 |
+
frame_width = frame.shape[1]
|
40 |
+
results = model(frame)
|
41 |
+
new_people_data = []
|
42 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
for result in results:
|
44 |
+
for box in result.boxes:
|
45 |
+
x1, y1, x2, y2 = map(int, box.xyxy[0])
|
46 |
+
conf = box.conf[0].item()
|
47 |
|
48 |
if conf > 0.5:
|
49 |
center_x = (x1 + x2) // 2
|
50 |
face_width_pixels = x2 - x1
|
51 |
+
|
52 |
+
# Determine position
|
53 |
if center_x < frame_width // 3:
|
54 |
position = "Left"
|
55 |
elif center_x > 2 * frame_width // 3:
|
56 |
position = "Right"
|
57 |
else:
|
58 |
position = "Center"
|
59 |
+
|
60 |
# Calculate distance
|
61 |
+
estimated_distance = (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
|
62 |
+
estimated_distance *= SCALING_FACTOR
|
63 |
+
|
64 |
+
new_people_data.append(FaceDetectionResponse(
|
65 |
+
position=position,
|
66 |
+
distance=round(estimated_distance, 1)
|
67 |
+
))
|
68 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
# Check if new people are detected
|
70 |
+
change_detected = False
|
71 |
if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
|
72 |
change_detected = True
|
73 |
|
74 |
if change_detected:
|
75 |
detected_people_history.append(new_people_data) # Save the latest detection
|
76 |
+
return {"faces": new_people_data}
|
77 |
else:
|
78 |
+
return {"faces": []} # No significant change detected
|
|
|
|
|
|