new
Browse files- app.py +135 -0
- data/image_paths.npy +3 -0
- data/labels.npy +3 -0
- models/gaze_estimation_model.pth +3 -0
- requirements.txt +80 -0
- scripts/__init__.py +0 -0
- scripts/gaze_tracker.py +148 -0
- scripts/inference.py +62 -0
- utils/ear_utils.py +64 -0
- utils/preprocess.py +8 -0
app.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
import tempfile
|
5 |
+
import os
|
6 |
+
from scripts.inference import GazePredictor
|
7 |
+
from utils.ear_utils import BlinkDetector
|
8 |
+
|
9 |
+
def smooth_values(history, current_value, window_size=5):
|
10 |
+
if current_value is not None:
|
11 |
+
history.append(current_value)
|
12 |
+
if len(history) > window_size:
|
13 |
+
history.pop(0)
|
14 |
+
return np.mean(history, axis=0) if isinstance(current_value, np.ndarray) and history else current_value if current_value is not None else 0
|
15 |
+
|
16 |
+
MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
|
17 |
+
|
18 |
+
def analyze_video(input_video):
|
19 |
+
cap = cv2.VideoCapture(input_video)
|
20 |
+
gaze_predictor = GazePredictor(MODEL_PATH)
|
21 |
+
blink_detector = BlinkDetector()
|
22 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
23 |
+
temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
|
24 |
+
os.close(temp_fd)
|
25 |
+
out = None
|
26 |
+
|
27 |
+
GAZE_STABILITY_THRESHOLD = 0.5
|
28 |
+
TIME_THRESHOLD = 15
|
29 |
+
BLINK_RATE_THRESHOLD = 1
|
30 |
+
EYE_CLOSURE_THRESHOLD = 10
|
31 |
+
HEAD_STABILITY_THRESHOLD = 0.05
|
32 |
+
|
33 |
+
gaze_history = []
|
34 |
+
head_history = []
|
35 |
+
ear_history = []
|
36 |
+
stable_gaze_time = 0
|
37 |
+
stable_head_time = 0
|
38 |
+
eye_closed_time = 0
|
39 |
+
blink_count = 0
|
40 |
+
start_time = 0
|
41 |
+
is_unconscious = False
|
42 |
+
|
43 |
+
frame_count = 0
|
44 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 20
|
45 |
+
|
46 |
+
while True:
|
47 |
+
ret, frame = cap.read()
|
48 |
+
if not ret:
|
49 |
+
break
|
50 |
+
frame_count += 1
|
51 |
+
if start_time == 0:
|
52 |
+
start_time = frame_count / fps
|
53 |
+
|
54 |
+
head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
|
55 |
+
current_gaze = np.array([gaze_h, gaze_v])
|
56 |
+
smoothed_gaze = smooth_values(gaze_history, current_gaze)
|
57 |
+
|
58 |
+
ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
|
59 |
+
if ear is None:
|
60 |
+
cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
61 |
+
smoothed_head = smooth_values(head_history, None)
|
62 |
+
smoothed_ear = smooth_values(ear_history, None)
|
63 |
+
else:
|
64 |
+
smoothed_head = smooth_values(head_history, head_pose)
|
65 |
+
smoothed_ear = smooth_values(ear_history, ear)
|
66 |
+
if smoothed_ear >= blink_detector.EAR_THRESHOLD:
|
67 |
+
cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
68 |
+
cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
69 |
+
|
70 |
+
cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
71 |
+
cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
72 |
+
cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
73 |
+
cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
74 |
+
|
75 |
+
if len(gaze_history) > 1:
|
76 |
+
gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
|
77 |
+
if gaze_diff < GAZE_STABILITY_THRESHOLD:
|
78 |
+
if stable_gaze_time == 0:
|
79 |
+
stable_gaze_time = frame_count / fps
|
80 |
+
else:
|
81 |
+
stable_gaze_time = 0
|
82 |
+
|
83 |
+
if len(head_history) > 1 and head_pose is not None:
|
84 |
+
head_diff = abs(smoothed_head - head_history[-2])
|
85 |
+
if head_diff < HEAD_STABILITY_THRESHOLD:
|
86 |
+
if stable_head_time == 0:
|
87 |
+
stable_head_time = frame_count / fps
|
88 |
+
else:
|
89 |
+
stable_head_time = 0
|
90 |
+
|
91 |
+
if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
|
92 |
+
if eye_closed_time == 0:
|
93 |
+
eye_closed_time = frame_count / fps
|
94 |
+
elif (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD:
|
95 |
+
cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
96 |
+
else:
|
97 |
+
if eye_closed_time > 0 and (frame_count / fps) - eye_closed_time < 0.5:
|
98 |
+
blink_count += 1
|
99 |
+
eye_closed_time = 0
|
100 |
+
|
101 |
+
elapsed_minutes = ((frame_count / fps) - start_time) / 60 if start_time > 0 else 0
|
102 |
+
blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
|
103 |
+
cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
104 |
+
|
105 |
+
unconscious_conditions = [
|
106 |
+
stable_gaze_time > 0 and (frame_count / fps) - stable_gaze_time > TIME_THRESHOLD,
|
107 |
+
blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
|
108 |
+
eye_closed_time > 0 and (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD,
|
109 |
+
stable_head_time > 0 and (frame_count / fps) - stable_head_time > TIME_THRESHOLD
|
110 |
+
]
|
111 |
+
if sum(unconscious_conditions) >= 2:
|
112 |
+
cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
113 |
+
is_unconscious = True
|
114 |
+
else:
|
115 |
+
is_unconscious = False
|
116 |
+
|
117 |
+
if out is None:
|
118 |
+
h, w = frame.shape[:2]
|
119 |
+
out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
|
120 |
+
out.write(frame)
|
121 |
+
cap.release()
|
122 |
+
if out:
|
123 |
+
out.release()
|
124 |
+
return temp_path
|
125 |
+
|
126 |
+
iface = gr.Interface(
|
127 |
+
fn=analyze_video,
|
128 |
+
inputs=gr.Video(),
|
129 |
+
outputs=gr.Video(),
|
130 |
+
title="Gaze Tracker",
|
131 |
+
description="Upload a video to analyze gaze and drowsiness."
|
132 |
+
)
|
133 |
+
|
134 |
+
if __name__ == "__main__":
|
135 |
+
iface.launch()
|
data/image_paths.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83e055c17ac696ca8a0349e9a0280e93a7f02142c86c1b22a51a16da52a8ae83
|
3 |
+
size 1670048
|
data/labels.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:657cc5cfec2c850eee6343f394ac681cd04361b7c455133d161421119cbce12d
|
3 |
+
size 70688
|
models/gaze_estimation_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b21ca13bee8bd9209ec0e95d3cee3c95f745abf84b88eeb9feb80a9f3316c61
|
3 |
+
size 94378602
|
requirements.txt
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==2.2.2
|
2 |
+
astunparse==1.6.3
|
3 |
+
attrs==25.3.0
|
4 |
+
certifi==2025.1.31
|
5 |
+
cffi==1.17.1
|
6 |
+
charset-normalizer==3.4.1
|
7 |
+
contourpy==1.3.2
|
8 |
+
cycler==0.12.1
|
9 |
+
filelock==3.18.0
|
10 |
+
flatbuffers==25.2.10
|
11 |
+
fonttools==4.57.0
|
12 |
+
fsspec==2025.3.2
|
13 |
+
gast==0.6.0
|
14 |
+
google-pasta==0.2.0
|
15 |
+
grpcio==1.71.0
|
16 |
+
h5py==3.13.0
|
17 |
+
idna==3.10
|
18 |
+
imutils==0.5.4
|
19 |
+
jax==0.6.0
|
20 |
+
jaxlib==0.6.0
|
21 |
+
Jinja2==3.1.6
|
22 |
+
keras==3.9.2
|
23 |
+
kiwisolver==1.4.8
|
24 |
+
libclang==18.1.1
|
25 |
+
Markdown==3.8
|
26 |
+
markdown-it-py==3.0.0
|
27 |
+
MarkupSafe==3.0.2
|
28 |
+
matplotlib==3.8.3
|
29 |
+
mdurl==0.1.2
|
30 |
+
mediapipe==0.10.21
|
31 |
+
ml_dtypes==0.5.1
|
32 |
+
mpmath==1.3.0
|
33 |
+
namex==0.0.8
|
34 |
+
networkx==3.4.2
|
35 |
+
numpy==1.26.4
|
36 |
+
nvidia-cublas-cu12==12.1.3.1
|
37 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
38 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
39 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
40 |
+
nvidia-cudnn-cu12==8.9.2.26
|
41 |
+
nvidia-cufft-cu12==11.0.2.54
|
42 |
+
nvidia-curand-cu12==10.3.2.106
|
43 |
+
nvidia-cusolver-cu12==11.4.5.107
|
44 |
+
nvidia-cusparse-cu12==12.1.0.106
|
45 |
+
nvidia-nccl-cu12==2.19.3
|
46 |
+
nvidia-nvjitlink-cu12==12.8.93
|
47 |
+
nvidia-nvtx-cu12==12.1.105
|
48 |
+
opencv-contrib-python==4.11.0.86
|
49 |
+
opencv-python==4.10.0.84
|
50 |
+
opt_einsum==3.4.0
|
51 |
+
optree==0.15.0
|
52 |
+
packaging==25.0
|
53 |
+
pillow==11.2.1
|
54 |
+
playsound==1.2.2
|
55 |
+
protobuf==4.25.6
|
56 |
+
pycparser==2.22
|
57 |
+
pygame==2.6.1
|
58 |
+
Pygments==2.19.1
|
59 |
+
pyparsing==3.2.3
|
60 |
+
python-dateutil==2.9.0.post0
|
61 |
+
requests==2.32.3
|
62 |
+
rich==14.0.0
|
63 |
+
scipy==1.15.2
|
64 |
+
sentencepiece==0.2.0
|
65 |
+
setuptools==79.0.0
|
66 |
+
six==1.17.0
|
67 |
+
sounddevice==0.5.1
|
68 |
+
sympy==1.13.3
|
69 |
+
tensorboard==2.19.0
|
70 |
+
tensorboard-data-server==0.7.2
|
71 |
+
tensorflow==2.19.0
|
72 |
+
termcolor==3.0.1
|
73 |
+
torch==2.2.1
|
74 |
+
torchvision==0.17.1
|
75 |
+
typing_extensions==4.13.2
|
76 |
+
urllib3==2.4.0
|
77 |
+
Werkzeug==3.1.3
|
78 |
+
wheel==0.45.1
|
79 |
+
wrapt==1.17.2
|
80 |
+
gradio==4.27.0
|
scripts/__init__.py
ADDED
File without changes
|
scripts/gaze_tracker.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
4 |
+
|
5 |
+
import cv2
|
6 |
+
import time
|
7 |
+
import numpy as np
|
8 |
+
from scripts.inference import GazePredictor
|
9 |
+
from utils.ear_utils import BlinkDetector
|
10 |
+
from pygame import mixer
|
11 |
+
|
12 |
+
def smooth_values(history, current_value, window_size=5):
|
13 |
+
if current_value is not None:
|
14 |
+
history.append(current_value)
|
15 |
+
if len(history) > window_size:
|
16 |
+
history.pop(0)
|
17 |
+
return np.mean(history, axis=0) if isinstance(current_value, np.ndarray) and history else current_value if current_value is not None else 0
|
18 |
+
|
19 |
+
def track_gaze(model_path):
|
20 |
+
gaze_predictor = GazePredictor(model_path)
|
21 |
+
blink_detector = BlinkDetector()
|
22 |
+
cap = cv2.VideoCapture(0)
|
23 |
+
|
24 |
+
if not cap.isOpened():
|
25 |
+
print("Error: Could not open webcam.")
|
26 |
+
return
|
27 |
+
|
28 |
+
GAZE_STABILITY_THRESHOLD = 0.5
|
29 |
+
TIME_THRESHOLD = 15
|
30 |
+
BLINK_RATE_THRESHOLD = 1
|
31 |
+
EYE_CLOSURE_THRESHOLD = 10
|
32 |
+
HEAD_STABILITY_THRESHOLD = 0.05
|
33 |
+
|
34 |
+
gaze_history = []
|
35 |
+
head_history = []
|
36 |
+
ear_history = []
|
37 |
+
stable_gaze_time = 0
|
38 |
+
stable_head_time = 0
|
39 |
+
eye_closed_time = 0
|
40 |
+
blink_count = 0
|
41 |
+
start_time = time.time()
|
42 |
+
is_unconscious = False
|
43 |
+
|
44 |
+
# Initialize pygame mixer
|
45 |
+
mixer.init()
|
46 |
+
ALARM_PATH = os.path.normpath(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "sounds", "alarm.wav")))
|
47 |
+
if not os.path.exists(ALARM_PATH):
|
48 |
+
print(f"Warning: Alarm sound file not found at {ALARM_PATH}. No sound will play.")
|
49 |
+
|
50 |
+
while True:
|
51 |
+
ret, frame = cap.read()
|
52 |
+
if not ret:
|
53 |
+
print("Failed to capture frame")
|
54 |
+
break
|
55 |
+
|
56 |
+
head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
|
57 |
+
current_gaze = np.array([gaze_h, gaze_v])
|
58 |
+
smoothed_gaze = smooth_values(gaze_history, current_gaze)
|
59 |
+
|
60 |
+
ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
|
61 |
+
if ear is None:
|
62 |
+
cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
63 |
+
print("No face detected")
|
64 |
+
smoothed_head = smooth_values(head_history, None)
|
65 |
+
smoothed_ear = smooth_values(ear_history, None)
|
66 |
+
else:
|
67 |
+
print(f"EAR: {ear:.2f}, Head Pose: {head_pose:.2f}, Gaze: [{smoothed_gaze[0]:.2f}, {smoothed_gaze[1]:.2f}]")
|
68 |
+
smoothed_head = smooth_values(head_history, head_pose)
|
69 |
+
smoothed_ear = smooth_values(ear_history, ear)
|
70 |
+
if smoothed_ear >= blink_detector.EAR_THRESHOLD:
|
71 |
+
cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
72 |
+
cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
73 |
+
|
74 |
+
cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
75 |
+
cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
76 |
+
cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
77 |
+
cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
78 |
+
|
79 |
+
if len(gaze_history) > 1:
|
80 |
+
gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
|
81 |
+
print(f"Gaze Diff: {gaze_diff:.2f}")
|
82 |
+
if gaze_diff < GAZE_STABILITY_THRESHOLD:
|
83 |
+
if stable_gaze_time == 0:
|
84 |
+
stable_gaze_time = time.time()
|
85 |
+
elif time.time() - stable_gaze_time > TIME_THRESHOLD:
|
86 |
+
cv2.putText(frame, "Gaze Fixed", (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
87 |
+
else:
|
88 |
+
stable_gaze_time = 0
|
89 |
+
|
90 |
+
if len(head_history) > 1 and head_pose is not None:
|
91 |
+
head_diff = abs(smoothed_head - head_history[-2])
|
92 |
+
print(f"Head Diff: {head_diff:.2f}")
|
93 |
+
if head_diff < HEAD_STABILITY_THRESHOLD:
|
94 |
+
if stable_head_time == 0:
|
95 |
+
stable_head_time = time.time()
|
96 |
+
else:
|
97 |
+
stable_head_time = 0
|
98 |
+
|
99 |
+
if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
|
100 |
+
if eye_closed_time == 0:
|
101 |
+
eye_closed_time = time.time()
|
102 |
+
elif time.time() - eye_closed_time > EYE_CLOSURE_THRESHOLD:
|
103 |
+
cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
104 |
+
else:
|
105 |
+
if eye_closed_time > 0 and time.time() - eye_closed_time < 0.5:
|
106 |
+
blink_count += 1
|
107 |
+
eye_closed_time = 0
|
108 |
+
|
109 |
+
elapsed_minutes = (time.time() - start_time) / 60
|
110 |
+
blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
|
111 |
+
cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
112 |
+
|
113 |
+
unconscious_conditions = [
|
114 |
+
stable_gaze_time > 0 and time.time() - stable_gaze_time > TIME_THRESHOLD,
|
115 |
+
blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
|
116 |
+
eye_closed_time > 0 and time.time() - eye_closed_time > EYE_CLOSURE_THRESHOLD,
|
117 |
+
stable_head_time > 0 and time.time() - stable_head_time > TIME_THRESHOLD
|
118 |
+
]
|
119 |
+
print(f"Conditions: {unconscious_conditions}")
|
120 |
+
if sum(unconscious_conditions) >= 2:
|
121 |
+
if not is_unconscious and os.path.exists(ALARM_PATH):
|
122 |
+
print(f"Attempting to play alarm at {ALARM_PATH}")
|
123 |
+
try:
|
124 |
+
mixer.music.load(ALARM_PATH)
|
125 |
+
mixer.music.play()
|
126 |
+
except Exception as e:
|
127 |
+
print(f"Error playing alarm sound: {e}")
|
128 |
+
print("Unconscious detected!")
|
129 |
+
cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
130 |
+
is_unconscious = True
|
131 |
+
else:
|
132 |
+
is_unconscious = False
|
133 |
+
|
134 |
+
cv2.imshow("Gaze Tracking", frame)
|
135 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
136 |
+
break
|
137 |
+
|
138 |
+
cap.release()
|
139 |
+
cv2.destroyAllWindows()
|
140 |
+
mixer.quit()
|
141 |
+
|
142 |
+
if __name__ == "__main__":
|
143 |
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
144 |
+
MODEL_PATH = os.path.join(SCRIPT_DIR, "..", "models", "gaze_estimation_model.pth")
|
145 |
+
if not os.path.exists(MODEL_PATH):
|
146 |
+
print(f"Error: Missing model file at {MODEL_PATH}")
|
147 |
+
sys.exit(1)
|
148 |
+
track_gaze(MODEL_PATH)
|
scripts/inference.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import cv2
|
4 |
+
from torchvision import transforms, models
|
5 |
+
from utils.preprocess import preprocess_frame
|
6 |
+
|
7 |
+
class GazeEstimationModel(torch.nn.Module):
|
8 |
+
def __init__(self):
|
9 |
+
super(GazeEstimationModel, self).__init__()
|
10 |
+
# Initialize ResNet-50 as the backbone
|
11 |
+
self.backbone = models.resnet50(pretrained=False)
|
12 |
+
# Modify the final fully connected layer for 3 outputs (head_pose, gaze_h, gaze_v)
|
13 |
+
self.backbone.fc = torch.nn.Linear(self.backbone.fc.in_features, 3)
|
14 |
+
|
15 |
+
def forward(self, x):
|
16 |
+
return self.backbone(x)
|
17 |
+
|
18 |
+
class GazePredictor:
|
19 |
+
def __init__(self, model_path):
|
20 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
21 |
+
|
22 |
+
# Initialize the custom model
|
23 |
+
self.model = GazeEstimationModel()
|
24 |
+
|
25 |
+
# Load the state dictionary
|
26 |
+
state_dict = torch.load(model_path, map_location=self.device)
|
27 |
+
|
28 |
+
# Check if state_dict has 'backbone.' prefix and strip it if necessary
|
29 |
+
new_state_dict = {}
|
30 |
+
for key, value in state_dict.items():
|
31 |
+
new_key = key.replace("backbone.", "") # Remove 'backbone.' prefix
|
32 |
+
new_state_dict[new_key] = value
|
33 |
+
|
34 |
+
# Load the adjusted state dictionary into the model
|
35 |
+
try:
|
36 |
+
self.model.backbone.load_state_dict(new_state_dict)
|
37 |
+
except RuntimeError as e:
|
38 |
+
print("Error loading state dict directly:", e)
|
39 |
+
print("Trying to load state dict with strict=False...")
|
40 |
+
self.model.backbone.load_state_dict(new_state_dict, strict=False)
|
41 |
+
|
42 |
+
# Move to device and set to evaluation mode
|
43 |
+
self.model.to(self.device)
|
44 |
+
self.model.eval()
|
45 |
+
|
46 |
+
# Define preprocessing transform
|
47 |
+
self.transform = transforms.Compose([
|
48 |
+
transforms.ToTensor(),
|
49 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
50 |
+
])
|
51 |
+
|
52 |
+
def predict_gaze(self, frame):
|
53 |
+
preprocessed = preprocess_frame(frame)
|
54 |
+
preprocessed = preprocessed[0]
|
55 |
+
preprocessed = self.transform(preprocessed).float().unsqueeze(0)
|
56 |
+
preprocessed = preprocessed.to(self.device)
|
57 |
+
with torch.no_grad():
|
58 |
+
outputs = self.model(preprocessed)
|
59 |
+
outputs = outputs.cpu().numpy()[0]
|
60 |
+
print("Model outputs:", outputs) # Debug print
|
61 |
+
head_pose, gaze_h, gaze_v = outputs
|
62 |
+
return head_pose, gaze_h, gaze_v
|
utils/ear_utils.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import mediapipe as mp
|
4 |
+
from scipy.spatial import distance as dist
|
5 |
+
|
6 |
+
def eye_aspect_ratio(eye_landmarks, landmarks, image_shape):
|
7 |
+
eye = [
|
8 |
+
landmarks[eye_landmarks[0]], # P1 (left)
|
9 |
+
landmarks[eye_landmarks[1]], # P2 (top-left)
|
10 |
+
landmarks[eye_landmarks[2]], # P3 (top-right)
|
11 |
+
landmarks[eye_landmarks[3]], # P4 (right)
|
12 |
+
landmarks[eye_landmarks[4]], # P5 (bottom-right)
|
13 |
+
landmarks[eye_landmarks[5]] # P6 (bottom-left)
|
14 |
+
]
|
15 |
+
eye = [(int(p.x * image_shape[1]), int(p.y * image_shape[0])) for p in eye]
|
16 |
+
|
17 |
+
A = dist.euclidean(eye[1], eye[5])
|
18 |
+
B = dist.euclidean(eye[2], eye[4])
|
19 |
+
C = dist.euclidean(eye[0], eye[3])
|
20 |
+
ear = (A + B) / (2.0 * C)
|
21 |
+
return ear, eye
|
22 |
+
|
23 |
+
class BlinkDetector:
|
24 |
+
def __init__(self):
|
25 |
+
self.mp_face_mesh = mp.solutions.face_mesh
|
26 |
+
self.face_mesh = self.mp_face_mesh.FaceMesh(
|
27 |
+
max_num_faces=1,
|
28 |
+
refine_landmarks=True, # Required for iris landmarks
|
29 |
+
min_detection_confidence=0.5,
|
30 |
+
min_tracking_confidence=0.5
|
31 |
+
)
|
32 |
+
self.EAR_THRESHOLD = 0.25
|
33 |
+
self.EAR_CONSEC_FRAMES = 3
|
34 |
+
|
35 |
+
def detect_blinks(self, frame):
|
36 |
+
image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
37 |
+
results = self.face_mesh.process(image_rgb)
|
38 |
+
|
39 |
+
if not results.multi_face_landmarks:
|
40 |
+
return None, None, None, None, None, None
|
41 |
+
|
42 |
+
landmarks = results.multi_face_landmarks[0].landmark
|
43 |
+
h, w = frame.shape[:2]
|
44 |
+
|
45 |
+
LEFT_EYE = [33, 160, 158, 133, 153, 144]
|
46 |
+
RIGHT_EYE = [362, 385, 387, 263, 373, 380]
|
47 |
+
LEFT_IRIS = 473 # Left iris center
|
48 |
+
RIGHT_IRIS = 468 # Right iris center
|
49 |
+
|
50 |
+
left_ear, left_eye_points = eye_aspect_ratio(LEFT_EYE, landmarks, (h, w))
|
51 |
+
right_ear, right_eye_points = eye_aspect_ratio(RIGHT_EYE, landmarks, (h, w))
|
52 |
+
avg_ear = (left_ear + right_ear) / 2.0
|
53 |
+
|
54 |
+
nose_tip = landmarks[1]
|
55 |
+
head_pose = (nose_tip.x - 0.5) * 2
|
56 |
+
|
57 |
+
# Iris coordinates
|
58 |
+
left_iris = (int(landmarks[LEFT_IRIS].x * w), int(landmarks[LEFT_IRIS].y * h))
|
59 |
+
right_iris = (int(landmarks[RIGHT_IRIS].x * w), int(landmarks[RIGHT_IRIS].y * h))
|
60 |
+
|
61 |
+
return avg_ear, left_eye_points, right_eye_points, head_pose, left_iris, right_iris
|
62 |
+
|
63 |
+
def __del__(self):
|
64 |
+
self.face_mesh.close()
|
utils/preprocess.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
from tensorflow.keras.preprocessing.image import img_to_array
|
4 |
+
|
5 |
+
def preprocess_frame(frame, target_size=(224, 224)):
|
6 |
+
frame = cv2.resize(frame, target_size)
|
7 |
+
frame = img_to_array(frame) / 255.0
|
8 |
+
return np.expand_dims(frame, axis=0)
|