Spaces:

KRISH09bha
/

Interactive-mode

Running

App Files Files Community

KRISH09bha commited on Mar 29

Commit

638ad81

verified ·

1 Parent(s): 312f686

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -51

app.py CHANGED Viewed

@@ -1,78 +1,94 @@
 import cv2
 import torch
 from ultralytics import YOLO
-from fastapi import FastAPI
-from pydantic import BaseModel
-from typing import List
-app = FastAPI()
-# Load YOLO model
-model = YOLO("yolov11s-face.pt")
-# Constants
 KNOWN_DISTANCE = 50  # cm
 KNOWN_FACE_WIDTH = 14  # cm
-REF_IMAGE_FACE_WIDTH = 120  # pixels
 FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
-SCALING_FACTOR = 2.0  # Experimental scaling fix
-# Store detection history
-detected_people_history = []
-class FaceDetectionResponse(BaseModel):
-    position: str
-    distance: float
-class DetectionResult(BaseModel):
-    faces: List[FaceDetectionResponse]
-@app.get("/detect", response_model=DetectionResult)
-def detect_faces():
-    cap = cv2.VideoCapture(0)
-    ret, frame = cap.read()
-    cap.release()
-    if not ret:
-        return {"faces": []}
-    frame_width = frame.shape[1]
-    results = model(frame)
-    new_people_data = []
     for result in results:
-        for box in result.boxes:
-            x1, y1, x2, y2 = map(int, box.xyxy[0])
-            conf = box.conf[0].item()
             if conf > 0.5:
                 center_x = (x1 + x2) // 2
                 face_width_pixels = x2 - x1
-                # Determine position
                 if center_x < frame_width // 3:
                     position = "Left"
                 elif center_x > 2 * frame_width // 3:
                     position = "Right"
                 else:
                     position = "Center"
                 # Calculate distance
-                estimated_distance = (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
-                estimated_distance *= SCALING_FACTOR
-                new_people_data.append(FaceDetectionResponse(
-                    position=position,
-                    distance=round(estimated_distance, 1)
-                ))
     # Check if new people are detected
-    change_detected = False
     if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
         change_detected = True
     if change_detected:
         detected_people_history.append(new_people_data)  # Save the latest detection
-        return {"faces": new_people_data}
     else:
-        return {"faces": []}  # No significant change detected

 import cv2
+import numpy as np
 import torch
+from collections import deque
 from ultralytics import YOLO
+from fastapi import FastAPI, File, UploadFile
+import uvicorn
+# Load YOLO model with optimized settings
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = YOLO("yolov11s-face.pt").to(device)
+# Constants for distance estimation
 KNOWN_DISTANCE = 50  # cm
 KNOWN_FACE_WIDTH = 14  # cm
+REF_IMAGE_FACE_WIDTH = 120  # Reference face width in pixels at the known distance
 FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
+SCALING_FACTOR = 2.0  # Adjust based on testing
+# FastAPI initialization
+app = FastAPI()
+# Optimized tracking of previous detections using a deque
+MAX_HISTORY = 10
+detected_people_history = deque(maxlen=MAX_HISTORY)
+DISTANCE_THRESHOLD = 30  # cm
+@app.post("/detect")
+async def detect_faces(file: UploadFile = File(...)):
+    # Read the image and decode it efficiently
+    contents = await file.read()
+    image_np = np.frombuffer(contents, np.uint8)
+    frame = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
+    # Resize frame for faster inference
+    h, w, _ = frame.shape
+    resized_frame = cv2.resize(frame, (w // 2, h // 2))  # Reduce resolution
+    # Run YOLO model on the frame
+    results = model(resized_frame, imgsz=320, half=True, verbose=False)
+    new_people_data = {}
+    change_detected = False
+    person_id = 1
+    frame_width = resized_frame.shape[1]
     for result in results:
+        for box in result.boxes.data.tolist():  # Convert tensor to list
+            x1, y1, x2, y2, conf, _ = box[:6]  # Extract values
+            x1, y1, x2, y2 = map(int, [x1 * 2, y1 * 2, x2 * 2, y2 * 2])  # Scale back
             if conf > 0.5:
                 center_x = (x1 + x2) // 2
                 face_width_pixels = x2 - x1
+                # Determine position (Left, Center, Right)
                 if center_x < frame_width // 3:
                     position = "Left"
                 elif center_x > 2 * frame_width // 3:
                     position = "Right"
                 else:
                     position = "Center"
                 # Calculate distance
+                estimated_distance = (
+                    (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
+                ) * SCALING_FACTOR if face_width_pixels > 0 else -1
+                new_people_data[f"person{person_id}"] = {
+                    "distance_cm": round(estimated_distance, 2),
+                    "position": position,
+                }
+                # Check for significant change
+                if detected_people_history:
+                    prev_data = detected_people_history[-1].get(f"person{person_id}")
+                    if (
+                        not prev_data
+                        or prev_data["position"] != position
+                        or abs(prev_data["distance_cm"] - estimated_distance) > DISTANCE_THRESHOLD
+                    ):
+                        change_detected = True
+                person_id += 1
     # Check if new people are detected
     if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
         change_detected = True
     if change_detected:
         detected_people_history.append(new_people_data)  # Save the latest detection
+        return {"people": new_people_data}
     else:
+        return {"people": []}  # No significant change detected