KRISH09bha commited on
Commit
638ad81
·
verified ·
1 Parent(s): 312f686

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -51
app.py CHANGED
@@ -1,78 +1,94 @@
1
  import cv2
 
2
  import torch
 
3
  from ultralytics import YOLO
4
- from fastapi import FastAPI
5
- from pydantic import BaseModel
6
- from typing import List
7
 
8
- app = FastAPI()
9
-
10
- # Load YOLO model
11
- model = YOLO("yolov11s-face.pt")
12
 
13
- # Constants
14
  KNOWN_DISTANCE = 50 # cm
15
  KNOWN_FACE_WIDTH = 14 # cm
16
- REF_IMAGE_FACE_WIDTH = 120 # pixels
17
  FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
18
- SCALING_FACTOR = 2.0 # Experimental scaling fix
19
-
20
- # Store detection history
21
- detected_people_history = []
22
-
23
- class FaceDetectionResponse(BaseModel):
24
- position: str
25
- distance: float
26
-
27
- class DetectionResult(BaseModel):
28
- faces: List[FaceDetectionResponse]
29
-
30
- @app.get("/detect", response_model=DetectionResult)
31
- def detect_faces():
32
- cap = cv2.VideoCapture(0)
33
- ret, frame = cap.read()
34
- cap.release()
35
-
36
- if not ret:
37
- return {"faces": []}
38
-
39
- frame_width = frame.shape[1]
40
- results = model(frame)
41
- new_people_data = []
42
-
 
 
 
 
43
  for result in results:
44
- for box in result.boxes:
45
- x1, y1, x2, y2 = map(int, box.xyxy[0])
46
- conf = box.conf[0].item()
47
 
48
  if conf > 0.5:
49
  center_x = (x1 + x2) // 2
50
  face_width_pixels = x2 - x1
51
-
52
- # Determine position
53
  if center_x < frame_width // 3:
54
  position = "Left"
55
  elif center_x > 2 * frame_width // 3:
56
  position = "Right"
57
  else:
58
  position = "Center"
59
-
60
  # Calculate distance
61
- estimated_distance = (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
62
- estimated_distance *= SCALING_FACTOR
63
-
64
- new_people_data.append(FaceDetectionResponse(
65
- position=position,
66
- distance=round(estimated_distance, 1)
67
- ))
68
-
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # Check if new people are detected
70
- change_detected = False
71
  if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
72
  change_detected = True
73
 
74
  if change_detected:
75
  detected_people_history.append(new_people_data) # Save the latest detection
76
- return {"faces": new_people_data}
77
  else:
78
- return {"faces": []} # No significant change detected
 
1
  import cv2
2
+ import numpy as np
3
  import torch
4
+ from collections import deque
5
  from ultralytics import YOLO
6
+ from fastapi import FastAPI, File, UploadFile
7
+ import uvicorn
 
8
 
9
+ # Load YOLO model with optimized settings
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ model = YOLO("yolov11s-face.pt").to(device)
 
12
 
13
+ # Constants for distance estimation
14
  KNOWN_DISTANCE = 50 # cm
15
  KNOWN_FACE_WIDTH = 14 # cm
16
+ REF_IMAGE_FACE_WIDTH = 120 # Reference face width in pixels at the known distance
17
  FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
18
+ SCALING_FACTOR = 2.0 # Adjust based on testing
19
+
20
+ # FastAPI initialization
21
+ app = FastAPI()
22
+
23
+ # Optimized tracking of previous detections using a deque
24
+ MAX_HISTORY = 10
25
+ detected_people_history = deque(maxlen=MAX_HISTORY)
26
+ DISTANCE_THRESHOLD = 30 # cm
27
+
28
+ @app.post("/detect")
29
+ async def detect_faces(file: UploadFile = File(...)):
30
+ # Read the image and decode it efficiently
31
+ contents = await file.read()
32
+ image_np = np.frombuffer(contents, np.uint8)
33
+ frame = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
34
+
35
+ # Resize frame for faster inference
36
+ h, w, _ = frame.shape
37
+ resized_frame = cv2.resize(frame, (w // 2, h // 2)) # Reduce resolution
38
+
39
+ # Run YOLO model on the frame
40
+ results = model(resized_frame, imgsz=320, half=True, verbose=False)
41
+
42
+ new_people_data = {}
43
+ change_detected = False
44
+ person_id = 1
45
+ frame_width = resized_frame.shape[1]
46
+
47
  for result in results:
48
+ for box in result.boxes.data.tolist(): # Convert tensor to list
49
+ x1, y1, x2, y2, conf, _ = box[:6] # Extract values
50
+ x1, y1, x2, y2 = map(int, [x1 * 2, y1 * 2, x2 * 2, y2 * 2]) # Scale back
51
 
52
  if conf > 0.5:
53
  center_x = (x1 + x2) // 2
54
  face_width_pixels = x2 - x1
55
+
56
+ # Determine position (Left, Center, Right)
57
  if center_x < frame_width // 3:
58
  position = "Left"
59
  elif center_x > 2 * frame_width // 3:
60
  position = "Right"
61
  else:
62
  position = "Center"
63
+
64
  # Calculate distance
65
+ estimated_distance = (
66
+ (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
67
+ ) * SCALING_FACTOR if face_width_pixels > 0 else -1
68
+
69
+ new_people_data[f"person{person_id}"] = {
70
+ "distance_cm": round(estimated_distance, 2),
71
+ "position": position,
72
+ }
73
+
74
+ # Check for significant change
75
+ if detected_people_history:
76
+ prev_data = detected_people_history[-1].get(f"person{person_id}")
77
+ if (
78
+ not prev_data
79
+ or prev_data["position"] != position
80
+ or abs(prev_data["distance_cm"] - estimated_distance) > DISTANCE_THRESHOLD
81
+ ):
82
+ change_detected = True
83
+
84
+ person_id += 1
85
+
86
  # Check if new people are detected
 
87
  if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
88
  change_detected = True
89
 
90
  if change_detected:
91
  detected_people_history.append(new_people_data) # Save the latest detection
92
+ return {"people": new_people_data}
93
  else:
94
+ return {"people": []} # No significant change detected