Spaces:

KRISH09bha
/

Interactive-mode

Sleeping

App Files Files Community

Interactive-mode / app.py

KRISH09bha

Update app.py

638ad81 verified 2 months ago

raw

history blame

3.39 kB

	import cv2
	import numpy as np
	import torch
	from collections import deque
	from ultralytics import YOLO
	from fastapi import FastAPI, File, UploadFile
	import uvicorn

	# Load YOLO model with optimized settings
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = YOLO("yolov11s-face.pt").to(device)

	# Constants for distance estimation
	KNOWN_DISTANCE = 50 # cm
	KNOWN_FACE_WIDTH = 14 # cm
	REF_IMAGE_FACE_WIDTH = 120 # Reference face width in pixels at the known distance
	FOCAL_LENGTH = (REF_IMAGE_FACE_WIDTH * KNOWN_DISTANCE) / KNOWN_FACE_WIDTH
	SCALING_FACTOR = 2.0 # Adjust based on testing

	# FastAPI initialization
	app = FastAPI()

	# Optimized tracking of previous detections using a deque
	MAX_HISTORY = 10
	detected_people_history = deque(maxlen=MAX_HISTORY)
	DISTANCE_THRESHOLD = 30 # cm

	@app.post("/detect")
	async def detect_faces(file: UploadFile = File(...)):
	# Read the image and decode it efficiently
	contents = await file.read()
	image_np = np.frombuffer(contents, np.uint8)
	frame = cv2.imdecode(image_np, cv2.IMREAD_COLOR)

	# Resize frame for faster inference
	h, w, _ = frame.shape
	resized_frame = cv2.resize(frame, (w // 2, h // 2)) # Reduce resolution

	# Run YOLO model on the frame
	results = model(resized_frame, imgsz=320, half=True, verbose=False)

	new_people_data = {}
	change_detected = False
	person_id = 1
	frame_width = resized_frame.shape[1]

	for result in results:
	for box in result.boxes.data.tolist(): # Convert tensor to list
	x1, y1, x2, y2, conf, _ = box[:6] # Extract values
	x1, y1, x2, y2 = map(int, [x1 * 2, y1 * 2, x2 * 2, y2 * 2]) # Scale back

	if conf > 0.5:
	center_x = (x1 + x2) // 2
	face_width_pixels = x2 - x1

	# Determine position (Left, Center, Right)
	if center_x < frame_width // 3:
	position = "Left"
	elif center_x > 2 * frame_width // 3:
	position = "Right"
	else:
	position = "Center"

	# Calculate distance
	estimated_distance = (
	(FOCAL_LENGTH * KNOWN_FACE_WIDTH) / face_width_pixels
	) * SCALING_FACTOR if face_width_pixels > 0 else -1

	new_people_data[f"person{person_id}"] = {
	"distance_cm": round(estimated_distance, 2),
	"position": position,
	}

	# Check for significant change
	if detected_people_history:
	prev_data = detected_people_history[-1].get(f"person{person_id}")
	if (
	not prev_data
	or prev_data["position"] != position
	or abs(prev_data["distance_cm"] - estimated_distance) > DISTANCE_THRESHOLD
	):
	change_detected = True

	person_id += 1

	# Check if new people are detected
	if not detected_people_history or len(new_people_data) != len(detected_people_history[-1]):
	change_detected = True

	if change_detected:
	detected_people_history.append(new_people_data) # Save the latest detection
	return {"people": new_people_data}
	else:
	return {"people": []} # No significant change detected