Spaces:

KRISH09bha
/

Interactive-mode

Running

App Files Files Community

KRISH09bha commited on Mar 29

Commit

312f686

verified ·

1 Parent(s): eeb2e13

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -72

app.py CHANGED Viewed

@@ -1,99 +1,78 @@
 import cv2
-import numpy as np
 import torch
-from collections import deque
 from ultralytics import YOLO
-from fastapi import FastAPI, File, UploadFile
-import uvicorn
-# Load YOLO model with optimized settings
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = YOLO("yolov11s-face.pt").to(device)
-# Constants for distance estimation
 KNOWN_DISTANCE = 50  # cm
 KNOWN_FACE_WIDTH = 14  # cm
-REF_IMAGE_FACE_WIDTH = 120  # Reference face width in pixels at the known distance
 FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
-SCALING_FACTOR = 2.0  # Adjust based on testing
-# FastAPI initialization
-app = FastAPI()
-# Optimized tracking of previous detections using a deque
-MAX_HISTORY = 10
-detected_people_history = deque(maxlen=MAX_HISTORY)
-DISTANCE_THRESHOLD = 30  # cm
-@app.post("/detect")
-async def detect_faces(file: UploadFile = File(...)):
-    # Read the image and decode it efficiently
-    contents = await file.read()
-    image_np = np.frombuffer(contents, np.uint8)
-    frame = cv2.imdecode(image_np, cv2.IMREAD_COLOR)  # Decode image correctly
-    # Resize frame for faster inference
-    h, w, _ = frame.shape
-    resized_frame = cv2.resize(frame, (w // 2, h // 2))  # Reduce resolution
-    # Run YOLO model on the frame
-    results = model(resized_frame, imgsz=320, half=True, verbose=False)
-    new_people_data = {}
-    change_detected = False
-    person_id = 1
-    frame_width = frame.shape[1]  # Use the original frame width
     for result in results:
-        for box in result.boxes.data.tolist():  # Convert tensor to list
-            x1, y1, x2, y2, conf, _ = box[:6]  # Extract values
-            x1, y1, x2, y2 = map(int, [x1 * 2, y1 * 2, x2 * 2, y2 * 2])  # Scale back
             if conf > 0.5:
                 center_x = (x1 + x2) // 2
                 face_width_pixels = x2 - x1
-                # Determine position (Left, Center, Right) using original frame width
                 if center_x < frame_width // 3:
                     position = "Left"
                 elif center_x > 2 * frame_width // 3:
                     position = "Right"
                 else:
                     position = "Center"
                 # Calculate distance
-                estimated_distance = (
-                    (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
-                ) * SCALING_FACTOR if face_width_pixels > 0 else -1
-                new_people_data[f"person{person_id}"] = {
-                    "distance_cm": round(estimated_distance, 2),
-                    "position": position,
-                }
-                # Check for significant change
-                if detected_people_history:
-                    prev_data = detected_people_history[-1].get(f"person{person_id}")
-                    if (
-                        not prev_data
-                        or prev_data["position"] != position
-                        or abs(prev_data["distance_cm"] - estimated_distance) > DISTANCE_THRESHOLD
-                    ):
-                        change_detected = True
-                person_id += 1
     # Check if new people are detected
     if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
         change_detected = True
     if change_detected:
         detected_people_history.append(new_people_data)  # Save the latest detection
-        return {"people": new_people_data}
     else:
-        return {"people": []}  # No significant change detected
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import cv2
 import torch
 from ultralytics import YOLO
+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import List
+app = FastAPI()
+# Load YOLO model
+model = YOLO("yolov11s-face.pt")
+# Constants
 KNOWN_DISTANCE = 50  # cm
 KNOWN_FACE_WIDTH = 14  # cm
+REF_IMAGE_FACE_WIDTH = 120  # pixels
 FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
+SCALING_FACTOR = 2.0  # Experimental scaling fix
+# Store detection history
+detected_people_history = []
+class FaceDetectionResponse(BaseModel):
+    position: str
+    distance: float
+class DetectionResult(BaseModel):
+    faces: List[FaceDetectionResponse]
+@app.get("/detect", response_model=DetectionResult)
+def detect_faces():
+    cap = cv2.VideoCapture(0)
+    ret, frame = cap.read()
+    cap.release()
+    if not ret:
+        return {"faces": []}
+    frame_width = frame.shape[1]
+    results = model(frame)
+    new_people_data = []
     for result in results:
+        for box in result.boxes:
+            x1, y1, x2, y2 = map(int, box.xyxy[0])
+            conf = box.conf[0].item()
             if conf > 0.5:
                 center_x = (x1 + x2) // 2
                 face_width_pixels = x2 - x1
+                # Determine position
                 if center_x < frame_width // 3:
                     position = "Left"
                 elif center_x > 2 * frame_width // 3:
                     position = "Right"
                 else:
                     position = "Center"
                 # Calculate distance
+                estimated_distance = (FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
+                estimated_distance *= SCALING_FACTOR
+                new_people_data.append(FaceDetectionResponse(
+                    position=position,
+                    distance=round(estimated_distance, 1)
+                ))
     # Check if new people are detected
+    change_detected = False
     if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
         change_detected = True
     if change_detected:
         detected_people_history.append(new_people_data)  # Save the latest detection
+        return {"faces": new_people_data}
     else:
+        return {"faces": []}  # No significant change detected