new
Browse files- app.py +231 -236
- requirements.txt +5 -4
app.py
CHANGED
@@ -6,313 +6,299 @@ import os
|
|
6 |
import time
|
7 |
from scripts.inference import GazePredictor
|
8 |
from utils.ear_utils import BlinkDetector
|
|
|
9 |
|
10 |
def smooth_values(history, current_value, window_size=5):
|
11 |
if current_value is not None:
|
12 |
-
|
|
|
|
|
|
|
13 |
if len(history) > window_size:
|
14 |
history.pop(0)
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def analyze_video(input_video):
|
20 |
cap = cv2.VideoCapture(input_video)
|
21 |
-
|
22 |
-
|
23 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
24 |
temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
|
25 |
os.close(temp_fd)
|
26 |
out = None
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
stable_head_time = 0
|
39 |
-
eye_closed_time = 0
|
40 |
-
blink_count = 0
|
41 |
-
start_time = 0
|
42 |
-
is_unconscious = False
|
43 |
|
44 |
-
|
45 |
-
fps = cap.get(cv2.CAP_PROP_FPS) or 20
|
46 |
|
47 |
while True:
|
48 |
ret, frame = cap.read()
|
49 |
if not ret:
|
50 |
break
|
51 |
-
|
52 |
-
|
53 |
-
start_time = frame_count / fps
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
-
ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
|
60 |
if ear is None:
|
61 |
-
cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
|
62 |
-
smoothed_head = smooth_values(
|
63 |
-
smoothed_ear = smooth_values(
|
64 |
else:
|
65 |
-
smoothed_head = smooth_values(
|
66 |
-
smoothed_ear = smooth_values(
|
67 |
-
if smoothed_ear >=
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
if len(gaze_history) > 1:
|
77 |
-
gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
|
78 |
-
if gaze_diff < GAZE_STABILITY_THRESHOLD:
|
79 |
-
if stable_gaze_time == 0:
|
80 |
-
stable_gaze_time = frame_count / fps
|
81 |
-
else:
|
82 |
-
stable_gaze_time = 0
|
83 |
|
84 |
-
if len(
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
if head_diff < HEAD_STABILITY_THRESHOLD:
|
87 |
-
if
|
88 |
-
|
89 |
else:
|
90 |
-
|
|
|
|
|
91 |
|
92 |
-
if ear is not None and smoothed_ear <
|
93 |
-
if
|
94 |
-
|
95 |
-
elif
|
96 |
cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
|
|
|
|
|
|
|
|
97 |
else:
|
98 |
-
|
99 |
-
blink_count += 1
|
100 |
-
eye_closed_time = 0
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
105 |
|
106 |
unconscious_conditions = [
|
107 |
-
|
108 |
-
blink_rate < BLINK_RATE_THRESHOLD and
|
109 |
-
|
110 |
-
|
111 |
]
|
|
|
112 |
if sum(unconscious_conditions) >= 2:
|
113 |
cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
114 |
-
|
115 |
else:
|
116 |
-
|
117 |
|
118 |
if out is None:
|
119 |
h, w = frame.shape[:2]
|
120 |
out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
|
121 |
out.write(frame)
|
|
|
122 |
cap.release()
|
123 |
if out:
|
124 |
out.release()
|
125 |
return temp_path
|
126 |
|
127 |
-
def
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
|
140 |
-
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
|
141 |
-
|
142 |
-
GAZE_STABILITY_THRESHOLD = 0.5
|
143 |
-
TIME_THRESHOLD = 15
|
144 |
-
BLINK_RATE_THRESHOLD = 1
|
145 |
-
EYE_CLOSURE_THRESHOLD = 10
|
146 |
-
HEAD_STABILITY_THRESHOLD = 0.05
|
147 |
-
|
148 |
-
gaze_history = []
|
149 |
-
head_history = []
|
150 |
-
ear_history = []
|
151 |
-
stable_gaze_time = 0
|
152 |
-
stable_head_time = 0
|
153 |
-
eye_closed_time = 0
|
154 |
-
blink_count = 0
|
155 |
-
start_time = time.time()
|
156 |
-
is_unconscious = False
|
157 |
-
log_output = ""
|
158 |
-
|
159 |
-
state = {
|
160 |
-
"gaze_predictor": gaze_predictor,
|
161 |
-
"blink_detector": blink_detector,
|
162 |
-
"cap": cap,
|
163 |
-
"gaze_history": gaze_history,
|
164 |
-
"head_history": head_history,
|
165 |
-
"ear_history": ear_history,
|
166 |
-
"stable_gaze_time": stable_gaze_time,
|
167 |
-
"stable_head_time": stable_head_time,
|
168 |
-
"eye_closed_time": eye_closed_time,
|
169 |
-
"blink_count": blink_count,
|
170 |
-
"start_time": start_time,
|
171 |
-
"is_unconscious": is_unconscious,
|
172 |
-
"GAZE_STABILITY_THRESHOLD": GAZE_STABILITY_THRESHOLD,
|
173 |
-
"TIME_THRESHOLD": TIME_THRESHOLD,
|
174 |
-
"BLINK_RATE_THRESHOLD": BLINK_RATE_THRESHOLD,
|
175 |
-
"EYE_CLOSURE_THRESHOLD": EYE_CLOSURE_THRESHOLD,
|
176 |
-
"HEAD_STABILITY_THRESHOLD": HEAD_STABILITY_THRESHOLD,
|
177 |
-
"log_output": log_output
|
178 |
-
}
|
179 |
-
return state, "Initializing webcam...", None
|
180 |
-
|
181 |
-
# Extract state variables
|
182 |
-
cap = state["cap"]
|
183 |
-
gaze_predictor = state["gaze_predictor"]
|
184 |
-
blink_detector = state["blink_detector"]
|
185 |
-
gaze_history = state["gaze_history"]
|
186 |
-
head_history = state["head_history"]
|
187 |
-
ear_history = state["ear_history"]
|
188 |
-
log_output = state["log_output"]
|
189 |
-
|
190 |
-
# Capture frame
|
191 |
-
ret, frame = cap.read()
|
192 |
-
if not ret or frame is None:
|
193 |
-
# Try to reinitialize the camera if frame capture fails
|
194 |
-
cap.release()
|
195 |
-
cap = cv2.VideoCapture(0)
|
196 |
-
if not cap.isOpened():
|
197 |
-
return state, log_output + "\nError: Could not read from webcam.", None
|
198 |
-
state["cap"] = cap
|
199 |
-
ret, frame = cap.read()
|
200 |
-
if not ret or frame is None:
|
201 |
-
return state, log_output + "\nError: Failed to capture frame after reinitialization.", None
|
202 |
-
|
203 |
-
# Process frame
|
204 |
try:
|
205 |
head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
|
206 |
-
current_gaze = np.array([gaze_h, gaze_v])
|
207 |
smoothed_gaze = smooth_values(gaze_history, current_gaze)
|
208 |
-
|
209 |
ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
|
210 |
-
|
211 |
-
# Update display and logs
|
212 |
-
current_time = time.time()
|
213 |
-
logs = []
|
214 |
-
|
215 |
if ear is None:
|
216 |
-
cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
|
217 |
smoothed_head = smooth_values(head_history, None)
|
218 |
smoothed_ear = smooth_values(ear_history, None)
|
219 |
-
logs.append("No face detected")
|
220 |
else:
|
221 |
smoothed_head = smooth_values(head_history, head_pose)
|
222 |
smoothed_ear = smooth_values(ear_history, ear)
|
223 |
-
if smoothed_ear >= blink_detector.EAR_THRESHOLD:
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
if
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
head_diff = abs(smoothed_head - head_history[-2])
|
245 |
-
if head_diff <
|
246 |
-
if
|
247 |
-
|
248 |
else:
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
256 |
cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
257 |
-
|
|
|
|
|
|
|
258 |
else:
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
state["eye_closed_time"] = 0
|
263 |
-
|
264 |
-
elapsed_seconds = current_time - state["start_time"]
|
265 |
elapsed_minutes = elapsed_seconds / 60
|
266 |
-
blink_rate =
|
267 |
-
cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX,
|
268 |
-
|
269 |
-
|
270 |
-
# Check for unconscious state
|
271 |
unconscious_conditions = [
|
272 |
-
|
273 |
-
blink_rate <
|
274 |
-
|
275 |
-
|
276 |
]
|
277 |
-
|
278 |
if sum(unconscious_conditions) >= 2:
|
279 |
cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
280 |
-
|
281 |
-
logs.append("WARNING: Possible unconscious state detected!")
|
282 |
else:
|
283 |
-
|
284 |
-
|
285 |
-
# Update log output with latest information
|
286 |
-
logs.append(f"Gaze: ({smoothed_gaze[0]:.2f}, {smoothed_gaze[1]:.2f}) | Head: {smoothed_head:.2f} | EAR: {smoothed_ear:.2f}")
|
287 |
-
log_text = "\n".join(logs)
|
288 |
-
|
289 |
-
# Keep log_output to a reasonable size
|
290 |
-
log_lines = log_output.split("\n") if log_output else []
|
291 |
-
log_lines.append(log_text)
|
292 |
-
if len(log_lines) > 20: # Keep only last 20 entries
|
293 |
-
log_lines = log_lines[-20:]
|
294 |
-
state["log_output"] = "\n".join(log_lines)
|
295 |
-
|
296 |
-
# Convert from BGR to RGB for Gradio
|
297 |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
except Exception as e:
|
302 |
-
|
303 |
-
|
|
|
|
|
|
|
|
|
304 |
|
305 |
def create_webcam_interface():
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
)
|
316 |
return webcam_demo
|
317 |
|
318 |
def create_video_interface():
|
@@ -325,7 +311,6 @@ def create_video_interface():
|
|
325 |
)
|
326 |
return video_demo
|
327 |
|
328 |
-
# Create a tabbed interface without the unsupported 'description' parameter
|
329 |
demo = gr.TabbedInterface(
|
330 |
[create_video_interface(), create_webcam_interface()],
|
331 |
["Video Upload", "Webcam"],
|
@@ -333,4 +318,14 @@ demo = gr.TabbedInterface(
|
|
333 |
)
|
334 |
|
335 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
demo.launch()
|
|
|
6 |
import time
|
7 |
from scripts.inference import GazePredictor
|
8 |
from utils.ear_utils import BlinkDetector
|
9 |
+
from gradio_webrtc import WebRTC
|
10 |
|
11 |
def smooth_values(history, current_value, window_size=5):
|
12 |
if current_value is not None:
|
13 |
+
if isinstance(current_value, np.ndarray):
|
14 |
+
history.append(current_value)
|
15 |
+
elif isinstance(current_value, (int, float)):
|
16 |
+
history.append(current_value)
|
17 |
if len(history) > window_size:
|
18 |
history.pop(0)
|
19 |
+
|
20 |
+
if not history:
|
21 |
+
return current_value
|
22 |
+
|
23 |
+
if all(isinstance(item, np.ndarray) for item in history):
|
24 |
+
first_shape = history[0].shape
|
25 |
+
if all(item.shape == first_shape for item in history):
|
26 |
+
return np.mean(history, axis=0)
|
27 |
+
else:
|
28 |
+
return history[-1] if history else None
|
29 |
+
elif all(isinstance(item, (int, float)) for item in history):
|
30 |
+
return np.mean(history)
|
31 |
+
else:
|
32 |
+
return history[-1] if history else None
|
33 |
|
34 |
MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
|
35 |
|
36 |
+
gaze_predictor = GazePredictor(MODEL_PATH)
|
37 |
+
blink_detector = BlinkDetector()
|
38 |
+
|
39 |
+
gaze_history = []
|
40 |
+
head_history = []
|
41 |
+
ear_history = []
|
42 |
+
stable_gaze_time = 0
|
43 |
+
stable_head_time = 0
|
44 |
+
eye_closed_time = 0
|
45 |
+
blink_count = 0
|
46 |
+
start_time = 0
|
47 |
+
is_unconscious = False
|
48 |
+
frame_count_webcam = 0
|
49 |
+
|
50 |
+
GAZE_STABILITY_THRESHOLD = 0.5
|
51 |
+
TIME_THRESHOLD = 15
|
52 |
+
BLINK_RATE_THRESHOLD = 1
|
53 |
+
EYE_CLOSURE_THRESHOLD = 10
|
54 |
+
HEAD_STABILITY_THRESHOLD = 0.05
|
55 |
+
|
56 |
def analyze_video(input_video):
|
57 |
cap = cv2.VideoCapture(input_video)
|
58 |
+
local_gaze_predictor = GazePredictor(MODEL_PATH)
|
59 |
+
local_blink_detector = BlinkDetector()
|
60 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
61 |
temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
|
62 |
os.close(temp_fd)
|
63 |
out = None
|
64 |
|
65 |
+
video_gaze_history = []
|
66 |
+
video_head_history = []
|
67 |
+
video_ear_history = []
|
68 |
+
video_stable_gaze_time = 0
|
69 |
+
video_stable_head_time = 0
|
70 |
+
video_eye_closed_time = 0
|
71 |
+
video_blink_count = 0
|
72 |
+
video_start_time = 0
|
73 |
+
video_is_unconscious = False
|
74 |
+
video_frame_count = 0
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 30
|
|
|
77 |
|
78 |
while True:
|
79 |
ret, frame = cap.read()
|
80 |
if not ret:
|
81 |
break
|
82 |
+
video_frame_count += 1
|
83 |
+
current_time_video = video_frame_count / fps
|
|
|
84 |
|
85 |
+
if video_start_time == 0:
|
86 |
+
video_start_time = current_time_video
|
87 |
+
|
88 |
+
head_pose_gaze, gaze_h, gaze_v = local_gaze_predictor.predict_gaze(frame)
|
89 |
+
current_gaze = np.array([gaze_h, gaze_v]) if gaze_h is not None and gaze_v is not None else None
|
90 |
+
smoothed_gaze = smooth_values(video_gaze_history, current_gaze)
|
91 |
+
|
92 |
+
ear, left_eye, right_eye, head_pose, left_iris, right_iris = local_blink_detector.detect_blinks(frame)
|
93 |
|
|
|
94 |
if ear is None:
|
95 |
+
cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
|
96 |
+
smoothed_head = smooth_values(video_head_history, None)
|
97 |
+
smoothed_ear = smooth_values(video_ear_history, None)
|
98 |
else:
|
99 |
+
smoothed_head = smooth_values(video_head_history, head_pose)
|
100 |
+
smoothed_ear = smooth_values(video_ear_history, ear)
|
101 |
+
if smoothed_ear >= local_blink_detector.EAR_THRESHOLD and left_iris and right_iris:
|
102 |
+
if all(isinstance(coord, (int, float)) and coord >= 0 for coord in left_iris) and \
|
103 |
+
all(isinstance(coord, (int, float)) and coord >= 0 for coord in right_iris):
|
104 |
+
try:
|
105 |
+
cv2.drawMarker(frame, tuple(map(int, left_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
106 |
+
cv2.drawMarker(frame, tuple(map(int, right_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
107 |
+
except OverflowError:
|
108 |
+
print(f"Warning: OverflowError drawing iris markers at {left_iris}, {right_iris}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
+
gaze_text_h = f"Gaze H: {smoothed_gaze[0]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 0 else "Gaze H: N/A"
|
111 |
+
gaze_text_v = f"Gaze V: {smoothed_gaze[1]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 1 else "Gaze V: N/A"
|
112 |
+
head_text = f"Head Pose: {smoothed_head:.2f}" if smoothed_head is not None else "Head Pose: N/A"
|
113 |
+
ear_text = f"EAR: {smoothed_ear:.2f}" if smoothed_ear is not None else "EAR: N/A"
|
114 |
+
|
115 |
+
cv2.putText(frame, gaze_text_h, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
116 |
+
cv2.putText(frame, gaze_text_v, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
117 |
+
cv2.putText(frame, head_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
118 |
+
cv2.putText(frame, ear_text, (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
119 |
+
|
120 |
+
if len(video_gaze_history) > 1 and smoothed_gaze is not None and video_gaze_history[-2] is not None:
|
121 |
+
try:
|
122 |
+
gaze_diff = np.sqrt(np.sum((smoothed_gaze - video_gaze_history[-2])**2))
|
123 |
+
if gaze_diff < GAZE_STABILITY_THRESHOLD:
|
124 |
+
if video_stable_gaze_time == 0:
|
125 |
+
video_stable_gaze_time = current_time_video
|
126 |
+
else:
|
127 |
+
video_stable_gaze_time = 0
|
128 |
+
except TypeError:
|
129 |
+
video_stable_gaze_time = 0
|
130 |
+
else:
|
131 |
+
video_stable_gaze_time = 0
|
132 |
+
|
133 |
+
if len(video_head_history) > 1 and smoothed_head is not None and video_head_history[-2] is not None:
|
134 |
+
head_diff = abs(smoothed_head - video_head_history[-2])
|
135 |
if head_diff < HEAD_STABILITY_THRESHOLD:
|
136 |
+
if video_stable_head_time == 0:
|
137 |
+
video_stable_head_time = current_time_video
|
138 |
else:
|
139 |
+
video_stable_head_time = 0
|
140 |
+
else:
|
141 |
+
video_stable_head_time = 0
|
142 |
|
143 |
+
if ear is not None and smoothed_ear is not None and smoothed_ear < local_blink_detector.EAR_THRESHOLD:
|
144 |
+
if video_eye_closed_time == 0:
|
145 |
+
video_eye_closed_time = current_time_video
|
146 |
+
elif current_time_video - video_eye_closed_time > EYE_CLOSURE_THRESHOLD:
|
147 |
cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
148 |
+
elif ear is not None:
|
149 |
+
if video_eye_closed_time > 0 and current_time_video - video_eye_closed_time < 0.5:
|
150 |
+
video_blink_count += 1
|
151 |
+
video_eye_closed_time = 0
|
152 |
else:
|
153 |
+
video_eye_closed_time = 0
|
|
|
|
|
154 |
|
155 |
+
elapsed_seconds_video = current_time_video - video_start_time if video_start_time > 0 else 0
|
156 |
+
elapsed_minutes_video = elapsed_seconds_video / 60
|
157 |
+
blink_rate = video_blink_count / elapsed_minutes_video if elapsed_minutes_video > 0 else 0
|
158 |
+
cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
159 |
|
160 |
unconscious_conditions = [
|
161 |
+
video_stable_gaze_time > 0 and current_time_video - video_stable_gaze_time > TIME_THRESHOLD,
|
162 |
+
blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes_video > 1,
|
163 |
+
video_eye_closed_time > 0 and current_time_video - video_eye_closed_time > EYE_CLOSURE_THRESHOLD,
|
164 |
+
video_stable_head_time > 0 and current_time_video - video_stable_head_time > TIME_THRESHOLD
|
165 |
]
|
166 |
+
|
167 |
if sum(unconscious_conditions) >= 2:
|
168 |
cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
169 |
+
video_is_unconscious = True
|
170 |
else:
|
171 |
+
video_is_unconscious = False
|
172 |
|
173 |
if out is None:
|
174 |
h, w = frame.shape[:2]
|
175 |
out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
|
176 |
out.write(frame)
|
177 |
+
|
178 |
cap.release()
|
179 |
if out:
|
180 |
out.release()
|
181 |
return temp_path
|
182 |
|
183 |
+
def process_webrtc_frame(frame):
|
184 |
+
global gaze_history, head_history, ear_history, stable_gaze_time, stable_head_time
|
185 |
+
global eye_closed_time, blink_count, start_time, is_unconscious, frame_count_webcam
|
186 |
+
|
187 |
+
if frame is None:
|
188 |
+
return np.zeros((480, 640, 3), dtype=np.uint8)
|
189 |
+
|
190 |
+
frame_count_webcam += 1
|
191 |
+
current_time = time.time()
|
192 |
+
if start_time == 0:
|
193 |
+
start_time = current_time
|
194 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
try:
|
196 |
head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
|
197 |
+
current_gaze = np.array([gaze_h, gaze_v]) if gaze_h is not None and gaze_v is not None else None
|
198 |
smoothed_gaze = smooth_values(gaze_history, current_gaze)
|
199 |
+
|
200 |
ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
|
201 |
+
|
|
|
|
|
|
|
|
|
202 |
if ear is None:
|
203 |
+
cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
|
204 |
smoothed_head = smooth_values(head_history, None)
|
205 |
smoothed_ear = smooth_values(ear_history, None)
|
|
|
206 |
else:
|
207 |
smoothed_head = smooth_values(head_history, head_pose)
|
208 |
smoothed_ear = smooth_values(ear_history, ear)
|
209 |
+
if smoothed_ear >= blink_detector.EAR_THRESHOLD and left_iris and right_iris:
|
210 |
+
if all(isinstance(coord, (int, float)) and coord >= 0 for coord in left_iris) and \
|
211 |
+
all(isinstance(coord, (int, float)) and coord >= 0 for coord in right_iris):
|
212 |
+
try:
|
213 |
+
cv2.drawMarker(frame, tuple(map(int, left_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
214 |
+
cv2.drawMarker(frame, tuple(map(int, right_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
215 |
+
except OverflowError:
|
216 |
+
print(f"Warning: OverflowError drawing iris markers at {left_iris}, {right_iris}")
|
217 |
+
|
218 |
+
gaze_text_h = f"Gaze H: {smoothed_gaze[0]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 0 else "Gaze H: N/A"
|
219 |
+
gaze_text_v = f"Gaze V: {smoothed_gaze[1]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 1 else "Gaze V: N/A"
|
220 |
+
head_text = f"Head Pose: {smoothed_head:.2f}" if smoothed_head is not None else "Head Pose: N/A"
|
221 |
+
ear_text = f"EAR: {smoothed_ear:.2f}" if smoothed_ear is not None else "EAR: N/A"
|
222 |
+
|
223 |
+
cv2.putText(frame, gaze_text_h, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
224 |
+
cv2.putText(frame, gaze_text_v, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
225 |
+
cv2.putText(frame, head_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
226 |
+
cv2.putText(frame, ear_text, (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
227 |
+
|
228 |
+
if len(gaze_history) > 1 and smoothed_gaze is not None and gaze_history[-2] is not None:
|
229 |
+
try:
|
230 |
+
gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
|
231 |
+
if gaze_diff < GAZE_STABILITY_THRESHOLD:
|
232 |
+
if stable_gaze_time == 0:
|
233 |
+
stable_gaze_time = current_time
|
234 |
+
else:
|
235 |
+
stable_gaze_time = 0
|
236 |
+
except TypeError:
|
237 |
+
stable_gaze_time = 0
|
238 |
+
else:
|
239 |
+
stable_gaze_time = 0
|
240 |
+
|
241 |
+
if len(head_history) > 1 and smoothed_head is not None and head_history[-2] is not None:
|
242 |
head_diff = abs(smoothed_head - head_history[-2])
|
243 |
+
if head_diff < HEAD_STABILITY_THRESHOLD:
|
244 |
+
if stable_head_time == 0:
|
245 |
+
stable_head_time = current_time
|
246 |
else:
|
247 |
+
stable_head_time = 0
|
248 |
+
else:
|
249 |
+
stable_head_time = 0
|
250 |
+
|
251 |
+
if ear is not None and smoothed_ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
|
252 |
+
if eye_closed_time == 0:
|
253 |
+
eye_closed_time = current_time
|
254 |
+
elif current_time - eye_closed_time > EYE_CLOSURE_THRESHOLD:
|
255 |
cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
256 |
+
elif ear is not None:
|
257 |
+
if eye_closed_time > 0 and current_time - eye_closed_time < 0.5:
|
258 |
+
blink_count += 1
|
259 |
+
eye_closed_time = 0
|
260 |
else:
|
261 |
+
eye_closed_time = 0
|
262 |
+
|
263 |
+
elapsed_seconds = current_time - start_time if start_time > 0 else 0
|
|
|
|
|
|
|
264 |
elapsed_minutes = elapsed_seconds / 60
|
265 |
+
blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
|
266 |
+
cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
|
267 |
+
|
|
|
|
|
268 |
unconscious_conditions = [
|
269 |
+
stable_gaze_time > 0 and current_time - stable_gaze_time > TIME_THRESHOLD,
|
270 |
+
blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
|
271 |
+
eye_closed_time > 0 and current_time - eye_closed_time > EYE_CLOSURE_THRESHOLD,
|
272 |
+
stable_head_time > 0 and current_time - stable_head_time > TIME_THRESHOLD
|
273 |
]
|
274 |
+
|
275 |
if sum(unconscious_conditions) >= 2:
|
276 |
cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
277 |
+
is_unconscious = True
|
|
|
278 |
else:
|
279 |
+
is_unconscious = False
|
280 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
282 |
+
return frame_rgb
|
283 |
+
|
|
|
284 |
except Exception as e:
|
285 |
+
print(f"Error processing frame: {e}")
|
286 |
+
error_frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
287 |
+
if not error_frame.flags.writeable:
|
288 |
+
error_frame = error_frame.copy()
|
289 |
+
cv2.putText(error_frame, f"Error: {e}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
|
290 |
+
return error_frame
|
291 |
|
292 |
def create_webcam_interface():
|
293 |
+
with gr.Blocks() as webcam_demo:
|
294 |
+
gr.Markdown("## Real-time Gaze Tracking via Webcam")
|
295 |
+
with gr.Row():
|
296 |
+
webcam_stream = WebRTC(label="Webcam Stream")
|
297 |
+
webcam_stream.stream(
|
298 |
+
fn=process_webrtc_frame,
|
299 |
+
inputs=[webcam_stream],
|
300 |
+
outputs=[webcam_stream]
|
301 |
+
)
|
|
|
302 |
return webcam_demo
|
303 |
|
304 |
def create_video_interface():
|
|
|
311 |
)
|
312 |
return video_demo
|
313 |
|
|
|
314 |
demo = gr.TabbedInterface(
|
315 |
[create_video_interface(), create_webcam_interface()],
|
316 |
["Video Upload", "Webcam"],
|
|
|
318 |
)
|
319 |
|
320 |
if __name__ == "__main__":
|
321 |
+
gaze_history = []
|
322 |
+
head_history = []
|
323 |
+
ear_history = []
|
324 |
+
stable_gaze_time = 0
|
325 |
+
stable_head_time = 0
|
326 |
+
eye_closed_time = 0
|
327 |
+
blink_count = 0
|
328 |
+
start_time = 0
|
329 |
+
is_unconscious = False
|
330 |
+
frame_count_webcam = 0
|
331 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
-
|
2 |
torchvision==0.17.1
|
3 |
opencv-python==4.10.0.84
|
4 |
numpy==1.26.4
|
5 |
-
mediapipe==0.10.21
|
6 |
imutils==0.5.4
|
7 |
matplotlib==3.8.3
|
8 |
playsound==1.2.2
|
9 |
-
gradio
|
|
|
10 |
tensorflow
|
11 |
-
pygame
|
|
|
|
|
|
1 |
torchvision==0.17.1
|
2 |
opencv-python==4.10.0.84
|
3 |
numpy==1.26.4
|
4 |
+
mediapipe==0.10.21
|
5 |
imutils==0.5.4
|
6 |
matplotlib==3.8.3
|
7 |
playsound==1.2.2
|
8 |
+
gradio>=5.0.0
|
9 |
+
gradio-webrtc==0.0.4
|
10 |
tensorflow
|
11 |
+
pygame
|
12 |
+
twilio
|