Guru-25 commited on
Commit
1b36b40
·
verified ·
1 Parent(s): a3ae3eb
Files changed (2) hide show
  1. app.py +231 -236
  2. requirements.txt +5 -4
app.py CHANGED
@@ -6,313 +6,299 @@ import os
6
  import time
7
  from scripts.inference import GazePredictor
8
  from utils.ear_utils import BlinkDetector
 
9
 
10
  def smooth_values(history, current_value, window_size=5):
11
  if current_value is not None:
12
- history.append(current_value)
 
 
 
13
  if len(history) > window_size:
14
  history.pop(0)
15
- return np.mean(history, axis=0) if isinstance(current_value, np.ndarray) and history else current_value if current_value is not None else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def analyze_video(input_video):
20
  cap = cv2.VideoCapture(input_video)
21
- gaze_predictor = GazePredictor(MODEL_PATH)
22
- blink_detector = BlinkDetector()
23
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
24
  temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
25
  os.close(temp_fd)
26
  out = None
27
 
28
- GAZE_STABILITY_THRESHOLD = 0.5
29
- TIME_THRESHOLD = 15
30
- BLINK_RATE_THRESHOLD = 1
31
- EYE_CLOSURE_THRESHOLD = 10
32
- HEAD_STABILITY_THRESHOLD = 0.05
33
-
34
- gaze_history = []
35
- head_history = []
36
- ear_history = []
37
- stable_gaze_time = 0
38
- stable_head_time = 0
39
- eye_closed_time = 0
40
- blink_count = 0
41
- start_time = 0
42
- is_unconscious = False
43
 
44
- frame_count = 0
45
- fps = cap.get(cv2.CAP_PROP_FPS) or 20
46
 
47
  while True:
48
  ret, frame = cap.read()
49
  if not ret:
50
  break
51
- frame_count += 1
52
- if start_time == 0:
53
- start_time = frame_count / fps
54
 
55
- head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
56
- current_gaze = np.array([gaze_h, gaze_v])
57
- smoothed_gaze = smooth_values(gaze_history, current_gaze)
 
 
 
 
 
58
 
59
- ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
60
  if ear is None:
61
- cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
62
- smoothed_head = smooth_values(head_history, None)
63
- smoothed_ear = smooth_values(ear_history, None)
64
  else:
65
- smoothed_head = smooth_values(head_history, head_pose)
66
- smoothed_ear = smooth_values(ear_history, ear)
67
- if smoothed_ear >= blink_detector.EAR_THRESHOLD:
68
- cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
69
- cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
70
-
71
- cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
72
- cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
73
- cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
74
- cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
75
-
76
- if len(gaze_history) > 1:
77
- gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
78
- if gaze_diff < GAZE_STABILITY_THRESHOLD:
79
- if stable_gaze_time == 0:
80
- stable_gaze_time = frame_count / fps
81
- else:
82
- stable_gaze_time = 0
83
 
84
- if len(head_history) > 1 and head_pose is not None:
85
- head_diff = abs(smoothed_head - head_history[-2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  if head_diff < HEAD_STABILITY_THRESHOLD:
87
- if stable_head_time == 0:
88
- stable_head_time = frame_count / fps
89
  else:
90
- stable_head_time = 0
 
 
91
 
92
- if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
93
- if eye_closed_time == 0:
94
- eye_closed_time = frame_count / fps
95
- elif (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD:
96
  cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
 
 
 
 
97
  else:
98
- if eye_closed_time > 0 and (frame_count / fps) - eye_closed_time < 0.5:
99
- blink_count += 1
100
- eye_closed_time = 0
101
 
102
- elapsed_minutes = ((frame_count / fps) - start_time) / 60 if start_time > 0 else 0
103
- blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
104
- cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
 
105
 
106
  unconscious_conditions = [
107
- stable_gaze_time > 0 and (frame_count / fps) - stable_gaze_time > TIME_THRESHOLD,
108
- blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
109
- eye_closed_time > 0 and (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD,
110
- stable_head_time > 0 and (frame_count / fps) - stable_head_time > TIME_THRESHOLD
111
  ]
 
112
  if sum(unconscious_conditions) >= 2:
113
  cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
114
- is_unconscious = True
115
  else:
116
- is_unconscious = False
117
 
118
  if out is None:
119
  h, w = frame.shape[:2]
120
  out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
121
  out.write(frame)
 
122
  cap.release()
123
  if out:
124
  out.release()
125
  return temp_path
126
 
127
- def process_webcam(state):
128
- """Process webcam frames in real-time and update log output"""
129
- if state is None:
130
- # Initialize state
131
- gaze_predictor = GazePredictor(MODEL_PATH)
132
- blink_detector = BlinkDetector()
133
- cap = cv2.VideoCapture(0)
134
-
135
- if not cap.isOpened():
136
- return None, "Error: Could not open webcam.", None
137
-
138
- # Try to set webcam properties for better performance
139
- cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
140
- cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
141
-
142
- GAZE_STABILITY_THRESHOLD = 0.5
143
- TIME_THRESHOLD = 15
144
- BLINK_RATE_THRESHOLD = 1
145
- EYE_CLOSURE_THRESHOLD = 10
146
- HEAD_STABILITY_THRESHOLD = 0.05
147
-
148
- gaze_history = []
149
- head_history = []
150
- ear_history = []
151
- stable_gaze_time = 0
152
- stable_head_time = 0
153
- eye_closed_time = 0
154
- blink_count = 0
155
- start_time = time.time()
156
- is_unconscious = False
157
- log_output = ""
158
-
159
- state = {
160
- "gaze_predictor": gaze_predictor,
161
- "blink_detector": blink_detector,
162
- "cap": cap,
163
- "gaze_history": gaze_history,
164
- "head_history": head_history,
165
- "ear_history": ear_history,
166
- "stable_gaze_time": stable_gaze_time,
167
- "stable_head_time": stable_head_time,
168
- "eye_closed_time": eye_closed_time,
169
- "blink_count": blink_count,
170
- "start_time": start_time,
171
- "is_unconscious": is_unconscious,
172
- "GAZE_STABILITY_THRESHOLD": GAZE_STABILITY_THRESHOLD,
173
- "TIME_THRESHOLD": TIME_THRESHOLD,
174
- "BLINK_RATE_THRESHOLD": BLINK_RATE_THRESHOLD,
175
- "EYE_CLOSURE_THRESHOLD": EYE_CLOSURE_THRESHOLD,
176
- "HEAD_STABILITY_THRESHOLD": HEAD_STABILITY_THRESHOLD,
177
- "log_output": log_output
178
- }
179
- return state, "Initializing webcam...", None
180
-
181
- # Extract state variables
182
- cap = state["cap"]
183
- gaze_predictor = state["gaze_predictor"]
184
- blink_detector = state["blink_detector"]
185
- gaze_history = state["gaze_history"]
186
- head_history = state["head_history"]
187
- ear_history = state["ear_history"]
188
- log_output = state["log_output"]
189
-
190
- # Capture frame
191
- ret, frame = cap.read()
192
- if not ret or frame is None:
193
- # Try to reinitialize the camera if frame capture fails
194
- cap.release()
195
- cap = cv2.VideoCapture(0)
196
- if not cap.isOpened():
197
- return state, log_output + "\nError: Could not read from webcam.", None
198
- state["cap"] = cap
199
- ret, frame = cap.read()
200
- if not ret or frame is None:
201
- return state, log_output + "\nError: Failed to capture frame after reinitialization.", None
202
-
203
- # Process frame
204
  try:
205
  head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
206
- current_gaze = np.array([gaze_h, gaze_v])
207
  smoothed_gaze = smooth_values(gaze_history, current_gaze)
208
-
209
  ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
210
-
211
- # Update display and logs
212
- current_time = time.time()
213
- logs = []
214
-
215
  if ear is None:
216
- cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
217
  smoothed_head = smooth_values(head_history, None)
218
  smoothed_ear = smooth_values(ear_history, None)
219
- logs.append("No face detected")
220
  else:
221
  smoothed_head = smooth_values(head_history, head_pose)
222
  smoothed_ear = smooth_values(ear_history, ear)
223
- if smoothed_ear >= blink_detector.EAR_THRESHOLD:
224
- cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
225
- cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
226
-
227
- # Add metrics to frame
228
- cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
229
- cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
230
- cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
231
- cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
232
-
233
- # Check for gaze stability
234
- if len(gaze_history) > 1:
235
- gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
236
- if gaze_diff < state["GAZE_STABILITY_THRESHOLD"]:
237
- if state["stable_gaze_time"] == 0:
238
- state["stable_gaze_time"] = current_time
239
- else:
240
- state["stable_gaze_time"] = 0
241
-
242
- # Check for head stability
243
- if len(head_history) > 1 and head_pose is not None:
 
 
 
 
 
 
 
 
 
 
 
 
244
  head_diff = abs(smoothed_head - head_history[-2])
245
- if head_diff < state["HEAD_STABILITY_THRESHOLD"]:
246
- if state["stable_head_time"] == 0:
247
- state["stable_head_time"] = current_time
248
  else:
249
- state["stable_head_time"] = 0
250
-
251
- # Check for eye closure
252
- if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
253
- if state["eye_closed_time"] == 0:
254
- state["eye_closed_time"] = current_time
255
- elif current_time - state["eye_closed_time"] > state["EYE_CLOSURE_THRESHOLD"]:
 
256
  cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
257
- logs.append("Eyes have been closed for an extended period")
 
 
 
258
  else:
259
- if state["eye_closed_time"] > 0 and current_time - state["eye_closed_time"] < 0.5:
260
- state["blink_count"] += 1
261
- logs.append("Blink detected")
262
- state["eye_closed_time"] = 0
263
-
264
- elapsed_seconds = current_time - state["start_time"]
265
  elapsed_minutes = elapsed_seconds / 60
266
- blink_rate = state["blink_count"] / elapsed_minutes if elapsed_minutes > 0 else 0
267
- cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
268
- logs.append(f"Blink rate: {blink_rate:.1f}/min")
269
-
270
- # Check for unconscious state
271
  unconscious_conditions = [
272
- state["stable_gaze_time"] > 0 and current_time - state["stable_gaze_time"] > state["TIME_THRESHOLD"],
273
- blink_rate < state["BLINK_RATE_THRESHOLD"] and elapsed_minutes > 1,
274
- state["eye_closed_time"] > 0 and current_time - state["eye_closed_time"] > state["EYE_CLOSURE_THRESHOLD"],
275
- state["stable_head_time"] > 0 and current_time - state["stable_head_time"] > state["TIME_THRESHOLD"]
276
  ]
277
-
278
  if sum(unconscious_conditions) >= 2:
279
  cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
280
- state["is_unconscious"] = True
281
- logs.append("WARNING: Possible unconscious state detected!")
282
  else:
283
- state["is_unconscious"] = False
284
-
285
- # Update log output with latest information
286
- logs.append(f"Gaze: ({smoothed_gaze[0]:.2f}, {smoothed_gaze[1]:.2f}) | Head: {smoothed_head:.2f} | EAR: {smoothed_ear:.2f}")
287
- log_text = "\n".join(logs)
288
-
289
- # Keep log_output to a reasonable size
290
- log_lines = log_output.split("\n") if log_output else []
291
- log_lines.append(log_text)
292
- if len(log_lines) > 20: # Keep only last 20 entries
293
- log_lines = log_lines[-20:]
294
- state["log_output"] = "\n".join(log_lines)
295
-
296
- # Convert from BGR to RGB for Gradio
297
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
298
-
299
- return state, state["log_output"], frame_rgb
300
-
301
  except Exception as e:
302
- error_msg = f"Error processing frame: {str(e)}"
303
- return state, log_output + "\n" + error_msg, None
 
 
 
 
304
 
305
  def create_webcam_interface():
306
- log_output = gr.Textbox(label="Gaze Tracking Log", lines=10)
307
- processed_frame = gr.Image(label="Processed Frame")
308
-
309
- webcam_demo = gr.Interface(
310
- fn=process_webcam,
311
- inputs=[gr.State()],
312
- outputs=[gr.State(), log_output, processed_frame],
313
- live=True,
314
- title="Real-time Gaze Tracking"
315
- )
316
  return webcam_demo
317
 
318
  def create_video_interface():
@@ -325,7 +311,6 @@ def create_video_interface():
325
  )
326
  return video_demo
327
 
328
- # Create a tabbed interface without the unsupported 'description' parameter
329
  demo = gr.TabbedInterface(
330
  [create_video_interface(), create_webcam_interface()],
331
  ["Video Upload", "Webcam"],
@@ -333,4 +318,14 @@ demo = gr.TabbedInterface(
333
  )
334
 
335
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
336
  demo.launch()
 
6
  import time
7
  from scripts.inference import GazePredictor
8
  from utils.ear_utils import BlinkDetector
9
+ from gradio_webrtc import WebRTC
10
 
11
  def smooth_values(history, current_value, window_size=5):
12
  if current_value is not None:
13
+ if isinstance(current_value, np.ndarray):
14
+ history.append(current_value)
15
+ elif isinstance(current_value, (int, float)):
16
+ history.append(current_value)
17
  if len(history) > window_size:
18
  history.pop(0)
19
+
20
+ if not history:
21
+ return current_value
22
+
23
+ if all(isinstance(item, np.ndarray) for item in history):
24
+ first_shape = history[0].shape
25
+ if all(item.shape == first_shape for item in history):
26
+ return np.mean(history, axis=0)
27
+ else:
28
+ return history[-1] if history else None
29
+ elif all(isinstance(item, (int, float)) for item in history):
30
+ return np.mean(history)
31
+ else:
32
+ return history[-1] if history else None
33
 
34
  MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
35
 
36
+ gaze_predictor = GazePredictor(MODEL_PATH)
37
+ blink_detector = BlinkDetector()
38
+
39
+ gaze_history = []
40
+ head_history = []
41
+ ear_history = []
42
+ stable_gaze_time = 0
43
+ stable_head_time = 0
44
+ eye_closed_time = 0
45
+ blink_count = 0
46
+ start_time = 0
47
+ is_unconscious = False
48
+ frame_count_webcam = 0
49
+
50
+ GAZE_STABILITY_THRESHOLD = 0.5
51
+ TIME_THRESHOLD = 15
52
+ BLINK_RATE_THRESHOLD = 1
53
+ EYE_CLOSURE_THRESHOLD = 10
54
+ HEAD_STABILITY_THRESHOLD = 0.05
55
+
56
  def analyze_video(input_video):
57
  cap = cv2.VideoCapture(input_video)
58
+ local_gaze_predictor = GazePredictor(MODEL_PATH)
59
+ local_blink_detector = BlinkDetector()
60
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
61
  temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
62
  os.close(temp_fd)
63
  out = None
64
 
65
+ video_gaze_history = []
66
+ video_head_history = []
67
+ video_ear_history = []
68
+ video_stable_gaze_time = 0
69
+ video_stable_head_time = 0
70
+ video_eye_closed_time = 0
71
+ video_blink_count = 0
72
+ video_start_time = 0
73
+ video_is_unconscious = False
74
+ video_frame_count = 0
 
 
 
 
 
75
 
76
+ fps = cap.get(cv2.CAP_PROP_FPS) or 30
 
77
 
78
  while True:
79
  ret, frame = cap.read()
80
  if not ret:
81
  break
82
+ video_frame_count += 1
83
+ current_time_video = video_frame_count / fps
 
84
 
85
+ if video_start_time == 0:
86
+ video_start_time = current_time_video
87
+
88
+ head_pose_gaze, gaze_h, gaze_v = local_gaze_predictor.predict_gaze(frame)
89
+ current_gaze = np.array([gaze_h, gaze_v]) if gaze_h is not None and gaze_v is not None else None
90
+ smoothed_gaze = smooth_values(video_gaze_history, current_gaze)
91
+
92
+ ear, left_eye, right_eye, head_pose, left_iris, right_iris = local_blink_detector.detect_blinks(frame)
93
 
 
94
  if ear is None:
95
+ cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
96
+ smoothed_head = smooth_values(video_head_history, None)
97
+ smoothed_ear = smooth_values(video_ear_history, None)
98
  else:
99
+ smoothed_head = smooth_values(video_head_history, head_pose)
100
+ smoothed_ear = smooth_values(video_ear_history, ear)
101
+ if smoothed_ear >= local_blink_detector.EAR_THRESHOLD and left_iris and right_iris:
102
+ if all(isinstance(coord, (int, float)) and coord >= 0 for coord in left_iris) and \
103
+ all(isinstance(coord, (int, float)) and coord >= 0 for coord in right_iris):
104
+ try:
105
+ cv2.drawMarker(frame, tuple(map(int, left_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
106
+ cv2.drawMarker(frame, tuple(map(int, right_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
107
+ except OverflowError:
108
+ print(f"Warning: OverflowError drawing iris markers at {left_iris}, {right_iris}")
 
 
 
 
 
 
 
 
109
 
110
+ gaze_text_h = f"Gaze H: {smoothed_gaze[0]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 0 else "Gaze H: N/A"
111
+ gaze_text_v = f"Gaze V: {smoothed_gaze[1]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 1 else "Gaze V: N/A"
112
+ head_text = f"Head Pose: {smoothed_head:.2f}" if smoothed_head is not None else "Head Pose: N/A"
113
+ ear_text = f"EAR: {smoothed_ear:.2f}" if smoothed_ear is not None else "EAR: N/A"
114
+
115
+ cv2.putText(frame, gaze_text_h, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
116
+ cv2.putText(frame, gaze_text_v, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
117
+ cv2.putText(frame, head_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
118
+ cv2.putText(frame, ear_text, (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
119
+
120
+ if len(video_gaze_history) > 1 and smoothed_gaze is not None and video_gaze_history[-2] is not None:
121
+ try:
122
+ gaze_diff = np.sqrt(np.sum((smoothed_gaze - video_gaze_history[-2])**2))
123
+ if gaze_diff < GAZE_STABILITY_THRESHOLD:
124
+ if video_stable_gaze_time == 0:
125
+ video_stable_gaze_time = current_time_video
126
+ else:
127
+ video_stable_gaze_time = 0
128
+ except TypeError:
129
+ video_stable_gaze_time = 0
130
+ else:
131
+ video_stable_gaze_time = 0
132
+
133
+ if len(video_head_history) > 1 and smoothed_head is not None and video_head_history[-2] is not None:
134
+ head_diff = abs(smoothed_head - video_head_history[-2])
135
  if head_diff < HEAD_STABILITY_THRESHOLD:
136
+ if video_stable_head_time == 0:
137
+ video_stable_head_time = current_time_video
138
  else:
139
+ video_stable_head_time = 0
140
+ else:
141
+ video_stable_head_time = 0
142
 
143
+ if ear is not None and smoothed_ear is not None and smoothed_ear < local_blink_detector.EAR_THRESHOLD:
144
+ if video_eye_closed_time == 0:
145
+ video_eye_closed_time = current_time_video
146
+ elif current_time_video - video_eye_closed_time > EYE_CLOSURE_THRESHOLD:
147
  cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
148
+ elif ear is not None:
149
+ if video_eye_closed_time > 0 and current_time_video - video_eye_closed_time < 0.5:
150
+ video_blink_count += 1
151
+ video_eye_closed_time = 0
152
  else:
153
+ video_eye_closed_time = 0
 
 
154
 
155
+ elapsed_seconds_video = current_time_video - video_start_time if video_start_time > 0 else 0
156
+ elapsed_minutes_video = elapsed_seconds_video / 60
157
+ blink_rate = video_blink_count / elapsed_minutes_video if elapsed_minutes_video > 0 else 0
158
+ cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
159
 
160
  unconscious_conditions = [
161
+ video_stable_gaze_time > 0 and current_time_video - video_stable_gaze_time > TIME_THRESHOLD,
162
+ blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes_video > 1,
163
+ video_eye_closed_time > 0 and current_time_video - video_eye_closed_time > EYE_CLOSURE_THRESHOLD,
164
+ video_stable_head_time > 0 and current_time_video - video_stable_head_time > TIME_THRESHOLD
165
  ]
166
+
167
  if sum(unconscious_conditions) >= 2:
168
  cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
169
+ video_is_unconscious = True
170
  else:
171
+ video_is_unconscious = False
172
 
173
  if out is None:
174
  h, w = frame.shape[:2]
175
  out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
176
  out.write(frame)
177
+
178
  cap.release()
179
  if out:
180
  out.release()
181
  return temp_path
182
 
183
+ def process_webrtc_frame(frame):
184
+ global gaze_history, head_history, ear_history, stable_gaze_time, stable_head_time
185
+ global eye_closed_time, blink_count, start_time, is_unconscious, frame_count_webcam
186
+
187
+ if frame is None:
188
+ return np.zeros((480, 640, 3), dtype=np.uint8)
189
+
190
+ frame_count_webcam += 1
191
+ current_time = time.time()
192
+ if start_time == 0:
193
+ start_time = current_time
194
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  try:
196
  head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
197
+ current_gaze = np.array([gaze_h, gaze_v]) if gaze_h is not None and gaze_v is not None else None
198
  smoothed_gaze = smooth_values(gaze_history, current_gaze)
199
+
200
  ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
201
+
 
 
 
 
202
  if ear is None:
203
+ cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
204
  smoothed_head = smooth_values(head_history, None)
205
  smoothed_ear = smooth_values(ear_history, None)
 
206
  else:
207
  smoothed_head = smooth_values(head_history, head_pose)
208
  smoothed_ear = smooth_values(ear_history, ear)
209
+ if smoothed_ear >= blink_detector.EAR_THRESHOLD and left_iris and right_iris:
210
+ if all(isinstance(coord, (int, float)) and coord >= 0 for coord in left_iris) and \
211
+ all(isinstance(coord, (int, float)) and coord >= 0 for coord in right_iris):
212
+ try:
213
+ cv2.drawMarker(frame, tuple(map(int, left_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
214
+ cv2.drawMarker(frame, tuple(map(int, right_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
215
+ except OverflowError:
216
+ print(f"Warning: OverflowError drawing iris markers at {left_iris}, {right_iris}")
217
+
218
+ gaze_text_h = f"Gaze H: {smoothed_gaze[0]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 0 else "Gaze H: N/A"
219
+ gaze_text_v = f"Gaze V: {smoothed_gaze[1]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 1 else "Gaze V: N/A"
220
+ head_text = f"Head Pose: {smoothed_head:.2f}" if smoothed_head is not None else "Head Pose: N/A"
221
+ ear_text = f"EAR: {smoothed_ear:.2f}" if smoothed_ear is not None else "EAR: N/A"
222
+
223
+ cv2.putText(frame, gaze_text_h, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
224
+ cv2.putText(frame, gaze_text_v, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
225
+ cv2.putText(frame, head_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
226
+ cv2.putText(frame, ear_text, (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
227
+
228
+ if len(gaze_history) > 1 and smoothed_gaze is not None and gaze_history[-2] is not None:
229
+ try:
230
+ gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
231
+ if gaze_diff < GAZE_STABILITY_THRESHOLD:
232
+ if stable_gaze_time == 0:
233
+ stable_gaze_time = current_time
234
+ else:
235
+ stable_gaze_time = 0
236
+ except TypeError:
237
+ stable_gaze_time = 0
238
+ else:
239
+ stable_gaze_time = 0
240
+
241
+ if len(head_history) > 1 and smoothed_head is not None and head_history[-2] is not None:
242
  head_diff = abs(smoothed_head - head_history[-2])
243
+ if head_diff < HEAD_STABILITY_THRESHOLD:
244
+ if stable_head_time == 0:
245
+ stable_head_time = current_time
246
  else:
247
+ stable_head_time = 0
248
+ else:
249
+ stable_head_time = 0
250
+
251
+ if ear is not None and smoothed_ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
252
+ if eye_closed_time == 0:
253
+ eye_closed_time = current_time
254
+ elif current_time - eye_closed_time > EYE_CLOSURE_THRESHOLD:
255
  cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
256
+ elif ear is not None:
257
+ if eye_closed_time > 0 and current_time - eye_closed_time < 0.5:
258
+ blink_count += 1
259
+ eye_closed_time = 0
260
  else:
261
+ eye_closed_time = 0
262
+
263
+ elapsed_seconds = current_time - start_time if start_time > 0 else 0
 
 
 
264
  elapsed_minutes = elapsed_seconds / 60
265
+ blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
266
+ cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
267
+
 
 
268
  unconscious_conditions = [
269
+ stable_gaze_time > 0 and current_time - stable_gaze_time > TIME_THRESHOLD,
270
+ blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
271
+ eye_closed_time > 0 and current_time - eye_closed_time > EYE_CLOSURE_THRESHOLD,
272
+ stable_head_time > 0 and current_time - stable_head_time > TIME_THRESHOLD
273
  ]
274
+
275
  if sum(unconscious_conditions) >= 2:
276
  cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
277
+ is_unconscious = True
 
278
  else:
279
+ is_unconscious = False
280
+
 
 
 
 
 
 
 
 
 
 
 
 
281
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
282
+ return frame_rgb
283
+
 
284
  except Exception as e:
285
+ print(f"Error processing frame: {e}")
286
+ error_frame = np.zeros((480, 640, 3), dtype=np.uint8)
287
+ if not error_frame.flags.writeable:
288
+ error_frame = error_frame.copy()
289
+ cv2.putText(error_frame, f"Error: {e}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
290
+ return error_frame
291
 
292
  def create_webcam_interface():
293
+ with gr.Blocks() as webcam_demo:
294
+ gr.Markdown("## Real-time Gaze Tracking via Webcam")
295
+ with gr.Row():
296
+ webcam_stream = WebRTC(label="Webcam Stream")
297
+ webcam_stream.stream(
298
+ fn=process_webrtc_frame,
299
+ inputs=[webcam_stream],
300
+ outputs=[webcam_stream]
301
+ )
 
302
  return webcam_demo
303
 
304
  def create_video_interface():
 
311
  )
312
  return video_demo
313
 
 
314
  demo = gr.TabbedInterface(
315
  [create_video_interface(), create_webcam_interface()],
316
  ["Video Upload", "Webcam"],
 
318
  )
319
 
320
  if __name__ == "__main__":
321
+ gaze_history = []
322
+ head_history = []
323
+ ear_history = []
324
+ stable_gaze_time = 0
325
+ stable_head_time = 0
326
+ eye_closed_time = 0
327
+ blink_count = 0
328
+ start_time = 0
329
+ is_unconscious = False
330
+ frame_count_webcam = 0
331
  demo.launch()
requirements.txt CHANGED
@@ -1,11 +1,12 @@
1
-
2
  torchvision==0.17.1
3
  opencv-python==4.10.0.84
4
  numpy==1.26.4
5
- mediapipe==0.10.21 # Updated to the latest available version from your error list
6
  imutils==0.5.4
7
  matplotlib==3.8.3
8
  playsound==1.2.2
9
- gradio==4.27.0
 
10
  tensorflow
11
- pygame
 
 
 
1
  torchvision==0.17.1
2
  opencv-python==4.10.0.84
3
  numpy==1.26.4
4
+ mediapipe==0.10.21
5
  imutils==0.5.4
6
  matplotlib==3.8.3
7
  playsound==1.2.2
8
+ gradio>=5.0.0
9
+ gradio-webrtc==0.0.4
10
  tensorflow
11
+ pygame
12
+ twilio