Guru-25 commited on
Commit
b8b61aa
·
1 Parent(s): 6227cd5
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import tempfile
5
+ import os
6
+ from scripts.inference import GazePredictor
7
+ from utils.ear_utils import BlinkDetector
8
+
9
+ def smooth_values(history, current_value, window_size=5):
10
+ if current_value is not None:
11
+ history.append(current_value)
12
+ if len(history) > window_size:
13
+ history.pop(0)
14
+ return np.mean(history, axis=0) if isinstance(current_value, np.ndarray) and history else current_value if current_value is not None else 0
15
+
16
+ MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
17
+
18
+ def analyze_video(input_video):
19
+ cap = cv2.VideoCapture(input_video)
20
+ gaze_predictor = GazePredictor(MODEL_PATH)
21
+ blink_detector = BlinkDetector()
22
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
23
+ temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
24
+ os.close(temp_fd)
25
+ out = None
26
+
27
+ GAZE_STABILITY_THRESHOLD = 0.5
28
+ TIME_THRESHOLD = 15
29
+ BLINK_RATE_THRESHOLD = 1
30
+ EYE_CLOSURE_THRESHOLD = 10
31
+ HEAD_STABILITY_THRESHOLD = 0.05
32
+
33
+ gaze_history = []
34
+ head_history = []
35
+ ear_history = []
36
+ stable_gaze_time = 0
37
+ stable_head_time = 0
38
+ eye_closed_time = 0
39
+ blink_count = 0
40
+ start_time = 0
41
+ is_unconscious = False
42
+
43
+ frame_count = 0
44
+ fps = cap.get(cv2.CAP_PROP_FPS) or 20
45
+
46
+ while True:
47
+ ret, frame = cap.read()
48
+ if not ret:
49
+ break
50
+ frame_count += 1
51
+ if start_time == 0:
52
+ start_time = frame_count / fps
53
+
54
+ head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
55
+ current_gaze = np.array([gaze_h, gaze_v])
56
+ smoothed_gaze = smooth_values(gaze_history, current_gaze)
57
+
58
+ ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
59
+ if ear is None:
60
+ cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
61
+ smoothed_head = smooth_values(head_history, None)
62
+ smoothed_ear = smooth_values(ear_history, None)
63
+ else:
64
+ smoothed_head = smooth_values(head_history, head_pose)
65
+ smoothed_ear = smooth_values(ear_history, ear)
66
+ if smoothed_ear >= blink_detector.EAR_THRESHOLD:
67
+ cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
68
+ cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
69
+
70
+ cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
71
+ cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
72
+ cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
73
+ cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
74
+
75
+ if len(gaze_history) > 1:
76
+ gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
77
+ if gaze_diff < GAZE_STABILITY_THRESHOLD:
78
+ if stable_gaze_time == 0:
79
+ stable_gaze_time = frame_count / fps
80
+ else:
81
+ stable_gaze_time = 0
82
+
83
+ if len(head_history) > 1 and head_pose is not None:
84
+ head_diff = abs(smoothed_head - head_history[-2])
85
+ if head_diff < HEAD_STABILITY_THRESHOLD:
86
+ if stable_head_time == 0:
87
+ stable_head_time = frame_count / fps
88
+ else:
89
+ stable_head_time = 0
90
+
91
+ if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
92
+ if eye_closed_time == 0:
93
+ eye_closed_time = frame_count / fps
94
+ elif (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD:
95
+ cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
96
+ else:
97
+ if eye_closed_time > 0 and (frame_count / fps) - eye_closed_time < 0.5:
98
+ blink_count += 1
99
+ eye_closed_time = 0
100
+
101
+ elapsed_minutes = ((frame_count / fps) - start_time) / 60 if start_time > 0 else 0
102
+ blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
103
+ cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
104
+
105
+ unconscious_conditions = [
106
+ stable_gaze_time > 0 and (frame_count / fps) - stable_gaze_time > TIME_THRESHOLD,
107
+ blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
108
+ eye_closed_time > 0 and (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD,
109
+ stable_head_time > 0 and (frame_count / fps) - stable_head_time > TIME_THRESHOLD
110
+ ]
111
+ if sum(unconscious_conditions) >= 2:
112
+ cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
113
+ is_unconscious = True
114
+ else:
115
+ is_unconscious = False
116
+
117
+ if out is None:
118
+ h, w = frame.shape[:2]
119
+ out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
120
+ out.write(frame)
121
+ cap.release()
122
+ if out:
123
+ out.release()
124
+ return temp_path
125
+
126
+ iface = gr.Interface(
127
+ fn=analyze_video,
128
+ inputs=gr.Video(),
129
+ outputs=gr.Video(),
130
+ title="Gaze Tracker",
131
+ description="Upload a video to analyze gaze and drowsiness."
132
+ )
133
+
134
+ if __name__ == "__main__":
135
+ iface.launch()
data/image_paths.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e055c17ac696ca8a0349e9a0280e93a7f02142c86c1b22a51a16da52a8ae83
3
+ size 1670048
data/labels.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:657cc5cfec2c850eee6343f394ac681cd04361b7c455133d161421119cbce12d
3
+ size 70688
models/gaze_estimation_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b21ca13bee8bd9209ec0e95d3cee3c95f745abf84b88eeb9feb80a9f3316c61
3
+ size 94378602
requirements.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.2.2
2
+ astunparse==1.6.3
3
+ attrs==25.3.0
4
+ certifi==2025.1.31
5
+ cffi==1.17.1
6
+ charset-normalizer==3.4.1
7
+ contourpy==1.3.2
8
+ cycler==0.12.1
9
+ filelock==3.18.0
10
+ flatbuffers==25.2.10
11
+ fonttools==4.57.0
12
+ fsspec==2025.3.2
13
+ gast==0.6.0
14
+ google-pasta==0.2.0
15
+ grpcio==1.71.0
16
+ h5py==3.13.0
17
+ idna==3.10
18
+ imutils==0.5.4
19
+ jax==0.6.0
20
+ jaxlib==0.6.0
21
+ Jinja2==3.1.6
22
+ keras==3.9.2
23
+ kiwisolver==1.4.8
24
+ libclang==18.1.1
25
+ Markdown==3.8
26
+ markdown-it-py==3.0.0
27
+ MarkupSafe==3.0.2
28
+ matplotlib==3.8.3
29
+ mdurl==0.1.2
30
+ mediapipe==0.10.21
31
+ ml_dtypes==0.5.1
32
+ mpmath==1.3.0
33
+ namex==0.0.8
34
+ networkx==3.4.2
35
+ numpy==1.26.4
36
+ nvidia-cublas-cu12==12.1.3.1
37
+ nvidia-cuda-cupti-cu12==12.1.105
38
+ nvidia-cuda-nvrtc-cu12==12.1.105
39
+ nvidia-cuda-runtime-cu12==12.1.105
40
+ nvidia-cudnn-cu12==8.9.2.26
41
+ nvidia-cufft-cu12==11.0.2.54
42
+ nvidia-curand-cu12==10.3.2.106
43
+ nvidia-cusolver-cu12==11.4.5.107
44
+ nvidia-cusparse-cu12==12.1.0.106
45
+ nvidia-nccl-cu12==2.19.3
46
+ nvidia-nvjitlink-cu12==12.8.93
47
+ nvidia-nvtx-cu12==12.1.105
48
+ opencv-contrib-python==4.11.0.86
49
+ opencv-python==4.10.0.84
50
+ opt_einsum==3.4.0
51
+ optree==0.15.0
52
+ packaging==25.0
53
+ pillow==11.2.1
54
+ playsound==1.2.2
55
+ protobuf==4.25.6
56
+ pycparser==2.22
57
+ pygame==2.6.1
58
+ Pygments==2.19.1
59
+ pyparsing==3.2.3
60
+ python-dateutil==2.9.0.post0
61
+ requests==2.32.3
62
+ rich==14.0.0
63
+ scipy==1.15.2
64
+ sentencepiece==0.2.0
65
+ setuptools==79.0.0
66
+ six==1.17.0
67
+ sounddevice==0.5.1
68
+ sympy==1.13.3
69
+ tensorboard==2.19.0
70
+ tensorboard-data-server==0.7.2
71
+ tensorflow==2.19.0
72
+ termcolor==3.0.1
73
+ torch==2.2.1
74
+ torchvision==0.17.1
75
+ typing_extensions==4.13.2
76
+ urllib3==2.4.0
77
+ Werkzeug==3.1.3
78
+ wheel==0.45.1
79
+ wrapt==1.17.2
80
+ gradio==4.27.0
scripts/__init__.py ADDED
File without changes
scripts/gaze_tracker.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
4
+
5
+ import cv2
6
+ import time
7
+ import numpy as np
8
+ from scripts.inference import GazePredictor
9
+ from utils.ear_utils import BlinkDetector
10
+ from pygame import mixer
11
+
12
+ def smooth_values(history, current_value, window_size=5):
13
+ if current_value is not None:
14
+ history.append(current_value)
15
+ if len(history) > window_size:
16
+ history.pop(0)
17
+ return np.mean(history, axis=0) if isinstance(current_value, np.ndarray) and history else current_value if current_value is not None else 0
18
+
19
+ def track_gaze(model_path):
20
+ gaze_predictor = GazePredictor(model_path)
21
+ blink_detector = BlinkDetector()
22
+ cap = cv2.VideoCapture(0)
23
+
24
+ if not cap.isOpened():
25
+ print("Error: Could not open webcam.")
26
+ return
27
+
28
+ GAZE_STABILITY_THRESHOLD = 0.5
29
+ TIME_THRESHOLD = 15
30
+ BLINK_RATE_THRESHOLD = 1
31
+ EYE_CLOSURE_THRESHOLD = 10
32
+ HEAD_STABILITY_THRESHOLD = 0.05
33
+
34
+ gaze_history = []
35
+ head_history = []
36
+ ear_history = []
37
+ stable_gaze_time = 0
38
+ stable_head_time = 0
39
+ eye_closed_time = 0
40
+ blink_count = 0
41
+ start_time = time.time()
42
+ is_unconscious = False
43
+
44
+ # Initialize pygame mixer
45
+ mixer.init()
46
+ ALARM_PATH = os.path.normpath(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "sounds", "alarm.wav")))
47
+ if not os.path.exists(ALARM_PATH):
48
+ print(f"Warning: Alarm sound file not found at {ALARM_PATH}. No sound will play.")
49
+
50
+ while True:
51
+ ret, frame = cap.read()
52
+ if not ret:
53
+ print("Failed to capture frame")
54
+ break
55
+
56
+ head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
57
+ current_gaze = np.array([gaze_h, gaze_v])
58
+ smoothed_gaze = smooth_values(gaze_history, current_gaze)
59
+
60
+ ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
61
+ if ear is None:
62
+ cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
63
+ print("No face detected")
64
+ smoothed_head = smooth_values(head_history, None)
65
+ smoothed_ear = smooth_values(ear_history, None)
66
+ else:
67
+ print(f"EAR: {ear:.2f}, Head Pose: {head_pose:.2f}, Gaze: [{smoothed_gaze[0]:.2f}, {smoothed_gaze[1]:.2f}]")
68
+ smoothed_head = smooth_values(head_history, head_pose)
69
+ smoothed_ear = smooth_values(ear_history, ear)
70
+ if smoothed_ear >= blink_detector.EAR_THRESHOLD:
71
+ cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
72
+ cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
73
+
74
+ cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
75
+ cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
76
+ cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
77
+ cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
78
+
79
+ if len(gaze_history) > 1:
80
+ gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
81
+ print(f"Gaze Diff: {gaze_diff:.2f}")
82
+ if gaze_diff < GAZE_STABILITY_THRESHOLD:
83
+ if stable_gaze_time == 0:
84
+ stable_gaze_time = time.time()
85
+ elif time.time() - stable_gaze_time > TIME_THRESHOLD:
86
+ cv2.putText(frame, "Gaze Fixed", (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
87
+ else:
88
+ stable_gaze_time = 0
89
+
90
+ if len(head_history) > 1 and head_pose is not None:
91
+ head_diff = abs(smoothed_head - head_history[-2])
92
+ print(f"Head Diff: {head_diff:.2f}")
93
+ if head_diff < HEAD_STABILITY_THRESHOLD:
94
+ if stable_head_time == 0:
95
+ stable_head_time = time.time()
96
+ else:
97
+ stable_head_time = 0
98
+
99
+ if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
100
+ if eye_closed_time == 0:
101
+ eye_closed_time = time.time()
102
+ elif time.time() - eye_closed_time > EYE_CLOSURE_THRESHOLD:
103
+ cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
104
+ else:
105
+ if eye_closed_time > 0 and time.time() - eye_closed_time < 0.5:
106
+ blink_count += 1
107
+ eye_closed_time = 0
108
+
109
+ elapsed_minutes = (time.time() - start_time) / 60
110
+ blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
111
+ cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
112
+
113
+ unconscious_conditions = [
114
+ stable_gaze_time > 0 and time.time() - stable_gaze_time > TIME_THRESHOLD,
115
+ blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
116
+ eye_closed_time > 0 and time.time() - eye_closed_time > EYE_CLOSURE_THRESHOLD,
117
+ stable_head_time > 0 and time.time() - stable_head_time > TIME_THRESHOLD
118
+ ]
119
+ print(f"Conditions: {unconscious_conditions}")
120
+ if sum(unconscious_conditions) >= 2:
121
+ if not is_unconscious and os.path.exists(ALARM_PATH):
122
+ print(f"Attempting to play alarm at {ALARM_PATH}")
123
+ try:
124
+ mixer.music.load(ALARM_PATH)
125
+ mixer.music.play()
126
+ except Exception as e:
127
+ print(f"Error playing alarm sound: {e}")
128
+ print("Unconscious detected!")
129
+ cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
130
+ is_unconscious = True
131
+ else:
132
+ is_unconscious = False
133
+
134
+ cv2.imshow("Gaze Tracking", frame)
135
+ if cv2.waitKey(1) & 0xFF == ord('q'):
136
+ break
137
+
138
+ cap.release()
139
+ cv2.destroyAllWindows()
140
+ mixer.quit()
141
+
142
+ if __name__ == "__main__":
143
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
144
+ MODEL_PATH = os.path.join(SCRIPT_DIR, "..", "models", "gaze_estimation_model.pth")
145
+ if not os.path.exists(MODEL_PATH):
146
+ print(f"Error: Missing model file at {MODEL_PATH}")
147
+ sys.exit(1)
148
+ track_gaze(MODEL_PATH)
scripts/inference.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import cv2
4
+ from torchvision import transforms, models
5
+ from utils.preprocess import preprocess_frame
6
+
7
+ class GazeEstimationModel(torch.nn.Module):
8
+ def __init__(self):
9
+ super(GazeEstimationModel, self).__init__()
10
+ # Initialize ResNet-50 as the backbone
11
+ self.backbone = models.resnet50(pretrained=False)
12
+ # Modify the final fully connected layer for 3 outputs (head_pose, gaze_h, gaze_v)
13
+ self.backbone.fc = torch.nn.Linear(self.backbone.fc.in_features, 3)
14
+
15
+ def forward(self, x):
16
+ return self.backbone(x)
17
+
18
+ class GazePredictor:
19
+ def __init__(self, model_path):
20
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
+
22
+ # Initialize the custom model
23
+ self.model = GazeEstimationModel()
24
+
25
+ # Load the state dictionary
26
+ state_dict = torch.load(model_path, map_location=self.device)
27
+
28
+ # Check if state_dict has 'backbone.' prefix and strip it if necessary
29
+ new_state_dict = {}
30
+ for key, value in state_dict.items():
31
+ new_key = key.replace("backbone.", "") # Remove 'backbone.' prefix
32
+ new_state_dict[new_key] = value
33
+
34
+ # Load the adjusted state dictionary into the model
35
+ try:
36
+ self.model.backbone.load_state_dict(new_state_dict)
37
+ except RuntimeError as e:
38
+ print("Error loading state dict directly:", e)
39
+ print("Trying to load state dict with strict=False...")
40
+ self.model.backbone.load_state_dict(new_state_dict, strict=False)
41
+
42
+ # Move to device and set to evaluation mode
43
+ self.model.to(self.device)
44
+ self.model.eval()
45
+
46
+ # Define preprocessing transform
47
+ self.transform = transforms.Compose([
48
+ transforms.ToTensor(),
49
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
50
+ ])
51
+
52
+ def predict_gaze(self, frame):
53
+ preprocessed = preprocess_frame(frame)
54
+ preprocessed = preprocessed[0]
55
+ preprocessed = self.transform(preprocessed).float().unsqueeze(0)
56
+ preprocessed = preprocessed.to(self.device)
57
+ with torch.no_grad():
58
+ outputs = self.model(preprocessed)
59
+ outputs = outputs.cpu().numpy()[0]
60
+ print("Model outputs:", outputs) # Debug print
61
+ head_pose, gaze_h, gaze_v = outputs
62
+ return head_pose, gaze_h, gaze_v
utils/ear_utils.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import mediapipe as mp
4
+ from scipy.spatial import distance as dist
5
+
6
+ def eye_aspect_ratio(eye_landmarks, landmarks, image_shape):
7
+ eye = [
8
+ landmarks[eye_landmarks[0]], # P1 (left)
9
+ landmarks[eye_landmarks[1]], # P2 (top-left)
10
+ landmarks[eye_landmarks[2]], # P3 (top-right)
11
+ landmarks[eye_landmarks[3]], # P4 (right)
12
+ landmarks[eye_landmarks[4]], # P5 (bottom-right)
13
+ landmarks[eye_landmarks[5]] # P6 (bottom-left)
14
+ ]
15
+ eye = [(int(p.x * image_shape[1]), int(p.y * image_shape[0])) for p in eye]
16
+
17
+ A = dist.euclidean(eye[1], eye[5])
18
+ B = dist.euclidean(eye[2], eye[4])
19
+ C = dist.euclidean(eye[0], eye[3])
20
+ ear = (A + B) / (2.0 * C)
21
+ return ear, eye
22
+
23
+ class BlinkDetector:
24
+ def __init__(self):
25
+ self.mp_face_mesh = mp.solutions.face_mesh
26
+ self.face_mesh = self.mp_face_mesh.FaceMesh(
27
+ max_num_faces=1,
28
+ refine_landmarks=True, # Required for iris landmarks
29
+ min_detection_confidence=0.5,
30
+ min_tracking_confidence=0.5
31
+ )
32
+ self.EAR_THRESHOLD = 0.25
33
+ self.EAR_CONSEC_FRAMES = 3
34
+
35
+ def detect_blinks(self, frame):
36
+ image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
37
+ results = self.face_mesh.process(image_rgb)
38
+
39
+ if not results.multi_face_landmarks:
40
+ return None, None, None, None, None, None
41
+
42
+ landmarks = results.multi_face_landmarks[0].landmark
43
+ h, w = frame.shape[:2]
44
+
45
+ LEFT_EYE = [33, 160, 158, 133, 153, 144]
46
+ RIGHT_EYE = [362, 385, 387, 263, 373, 380]
47
+ LEFT_IRIS = 473 # Left iris center
48
+ RIGHT_IRIS = 468 # Right iris center
49
+
50
+ left_ear, left_eye_points = eye_aspect_ratio(LEFT_EYE, landmarks, (h, w))
51
+ right_ear, right_eye_points = eye_aspect_ratio(RIGHT_EYE, landmarks, (h, w))
52
+ avg_ear = (left_ear + right_ear) / 2.0
53
+
54
+ nose_tip = landmarks[1]
55
+ head_pose = (nose_tip.x - 0.5) * 2
56
+
57
+ # Iris coordinates
58
+ left_iris = (int(landmarks[LEFT_IRIS].x * w), int(landmarks[LEFT_IRIS].y * h))
59
+ right_iris = (int(landmarks[RIGHT_IRIS].x * w), int(landmarks[RIGHT_IRIS].y * h))
60
+
61
+ return avg_ear, left_eye_points, right_eye_points, head_pose, left_iris, right_iris
62
+
63
+ def __del__(self):
64
+ self.face_mesh.close()
utils/preprocess.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from tensorflow.keras.preprocessing.image import img_to_array
4
+
5
+ def preprocess_frame(frame, target_size=(224, 224)):
6
+ frame = cv2.resize(frame, target_size)
7
+ frame = img_to_array(frame) / 255.0
8
+ return np.expand_dims(frame, axis=0)