File size: 5,477 Bytes
b8b61aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
import cv2
import numpy as np
import tempfile
import os
from scripts.inference import GazePredictor
from utils.ear_utils import BlinkDetector
def smooth_values(history, current_value, window_size=5):
if current_value is not None:
history.append(current_value)
if len(history) > window_size:
history.pop(0)
return np.mean(history, axis=0) if isinstance(current_value, np.ndarray) and history else current_value if current_value is not None else 0
MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
def analyze_video(input_video):
cap = cv2.VideoCapture(input_video)
gaze_predictor = GazePredictor(MODEL_PATH)
blink_detector = BlinkDetector()
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
os.close(temp_fd)
out = None
GAZE_STABILITY_THRESHOLD = 0.5
TIME_THRESHOLD = 15
BLINK_RATE_THRESHOLD = 1
EYE_CLOSURE_THRESHOLD = 10
HEAD_STABILITY_THRESHOLD = 0.05
gaze_history = []
head_history = []
ear_history = []
stable_gaze_time = 0
stable_head_time = 0
eye_closed_time = 0
blink_count = 0
start_time = 0
is_unconscious = False
frame_count = 0
fps = cap.get(cv2.CAP_PROP_FPS) or 20
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
if start_time == 0:
start_time = frame_count / fps
head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
current_gaze = np.array([gaze_h, gaze_v])
smoothed_gaze = smooth_values(gaze_history, current_gaze)
ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
if ear is None:
cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
smoothed_head = smooth_values(head_history, None)
smoothed_ear = smooth_values(ear_history, None)
else:
smoothed_head = smooth_values(head_history, head_pose)
smoothed_ear = smooth_values(ear_history, ear)
if smoothed_ear >= blink_detector.EAR_THRESHOLD:
cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
if len(gaze_history) > 1:
gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
if gaze_diff < GAZE_STABILITY_THRESHOLD:
if stable_gaze_time == 0:
stable_gaze_time = frame_count / fps
else:
stable_gaze_time = 0
if len(head_history) > 1 and head_pose is not None:
head_diff = abs(smoothed_head - head_history[-2])
if head_diff < HEAD_STABILITY_THRESHOLD:
if stable_head_time == 0:
stable_head_time = frame_count / fps
else:
stable_head_time = 0
if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
if eye_closed_time == 0:
eye_closed_time = frame_count / fps
elif (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD:
cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
else:
if eye_closed_time > 0 and (frame_count / fps) - eye_closed_time < 0.5:
blink_count += 1
eye_closed_time = 0
elapsed_minutes = ((frame_count / fps) - start_time) / 60 if start_time > 0 else 0
blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
unconscious_conditions = [
stable_gaze_time > 0 and (frame_count / fps) - stable_gaze_time > TIME_THRESHOLD,
blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
eye_closed_time > 0 and (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD,
stable_head_time > 0 and (frame_count / fps) - stable_head_time > TIME_THRESHOLD
]
if sum(unconscious_conditions) >= 2:
cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
is_unconscious = True
else:
is_unconscious = False
if out is None:
h, w = frame.shape[:2]
out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
out.write(frame)
cap.release()
if out:
out.release()
return temp_path
iface = gr.Interface(
fn=analyze_video,
inputs=gr.Video(),
outputs=gr.Video(),
title="Gaze Tracker",
description="Upload a video to analyze gaze and drowsiness."
)
if __name__ == "__main__":
iface.launch()
|