Guru-25 commited on
Commit
4aecca0
·
verified ·
1 Parent(s): 3909e02
Files changed (1) hide show
  1. app.py +187 -6
app.py CHANGED
@@ -3,6 +3,7 @@ import cv2
3
  import numpy as np
4
  import tempfile
5
  import os
 
6
  from scripts.inference import GazePredictor
7
  from utils.ear_utils import BlinkDetector
8
 
@@ -123,13 +124,193 @@ def analyze_video(input_video):
123
  out.release()
124
  return temp_path
125
 
126
- iface = gr.Interface(
127
- fn=analyze_video,
128
- inputs=gr.Video(),
129
- outputs=gr.Video(),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  title="Gaze Tracker",
131
- description="Upload a video to analyze gaze and drowsiness."
132
  )
133
 
134
  if __name__ == "__main__":
135
- iface.launch()
 
3
  import numpy as np
4
  import tempfile
5
  import os
6
+ import time
7
  from scripts.inference import GazePredictor
8
  from utils.ear_utils import BlinkDetector
9
 
 
124
  out.release()
125
  return temp_path
126
 
127
+ def process_webcam(state, log_output):
128
+ """Process webcam frames in real-time and update log output"""
129
+ if state is None:
130
+ # Initialize state
131
+ gaze_predictor = GazePredictor(MODEL_PATH)
132
+ blink_detector = BlinkDetector()
133
+ cap = cv2.VideoCapture(0)
134
+
135
+ if not cap.isOpened():
136
+ return None, None, "Error: Could not open webcam."
137
+
138
+ GAZE_STABILITY_THRESHOLD = 0.5
139
+ TIME_THRESHOLD = 15
140
+ BLINK_RATE_THRESHOLD = 1
141
+ EYE_CLOSURE_THRESHOLD = 10
142
+ HEAD_STABILITY_THRESHOLD = 0.05
143
+
144
+ gaze_history = []
145
+ head_history = []
146
+ ear_history = []
147
+ stable_gaze_time = 0
148
+ stable_head_time = 0
149
+ eye_closed_time = 0
150
+ blink_count = 0
151
+ start_time = time.time()
152
+ is_unconscious = False
153
+
154
+ state = {
155
+ "gaze_predictor": gaze_predictor,
156
+ "blink_detector": blink_detector,
157
+ "cap": cap,
158
+ "gaze_history": gaze_history,
159
+ "head_history": head_history,
160
+ "ear_history": ear_history,
161
+ "stable_gaze_time": stable_gaze_time,
162
+ "stable_head_time": stable_head_time,
163
+ "eye_closed_time": eye_closed_time,
164
+ "blink_count": blink_count,
165
+ "start_time": start_time,
166
+ "is_unconscious": is_unconscious,
167
+ "GAZE_STABILITY_THRESHOLD": GAZE_STABILITY_THRESHOLD,
168
+ "TIME_THRESHOLD": TIME_THRESHOLD,
169
+ "BLINK_RATE_THRESHOLD": BLINK_RATE_THRESHOLD,
170
+ "EYE_CLOSURE_THRESHOLD": EYE_CLOSURE_THRESHOLD,
171
+ "HEAD_STABILITY_THRESHOLD": HEAD_STABILITY_THRESHOLD
172
+ }
173
+
174
+ # Extract state variables
175
+ cap = state["cap"]
176
+ gaze_predictor = state["gaze_predictor"]
177
+ blink_detector = state["blink_detector"]
178
+ gaze_history = state["gaze_history"]
179
+ head_history = state["head_history"]
180
+ ear_history = state["ear_history"]
181
+
182
+ # Capture frame
183
+ ret, frame = cap.read()
184
+ if not ret:
185
+ return state, None, log_output + "\nError: Could not read from webcam."
186
+
187
+ # Process frame
188
+ head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
189
+ current_gaze = np.array([gaze_h, gaze_v])
190
+ smoothed_gaze = smooth_values(gaze_history, current_gaze)
191
+
192
+ ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
193
+
194
+ # Update display and logs
195
+ current_time = time.time()
196
+ logs = []
197
+
198
+ if ear is None:
199
+ cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
200
+ smoothed_head = smooth_values(head_history, None)
201
+ smoothed_ear = smooth_values(ear_history, None)
202
+ logs.append("No face detected")
203
+ else:
204
+ smoothed_head = smooth_values(head_history, head_pose)
205
+ smoothed_ear = smooth_values(ear_history, ear)
206
+ if smoothed_ear >= blink_detector.EAR_THRESHOLD:
207
+ cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
208
+ cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
209
+
210
+ # Add metrics to frame
211
+ cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
212
+ cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
213
+ cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
214
+ cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
215
+
216
+ # Check for gaze stability
217
+ if len(gaze_history) > 1:
218
+ gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
219
+ if gaze_diff < state["GAZE_STABILITY_THRESHOLD"]:
220
+ if state["stable_gaze_time"] == 0:
221
+ state["stable_gaze_time"] = current_time
222
+ else:
223
+ state["stable_gaze_time"] = 0
224
+
225
+ # Check for head stability
226
+ if len(head_history) > 1 and head_pose is not None:
227
+ head_diff = abs(smoothed_head - head_history[-2])
228
+ if head_diff < state["HEAD_STABILITY_THRESHOLD"]:
229
+ if state["stable_head_time"] == 0:
230
+ state["stable_head_time"] = current_time
231
+ else:
232
+ state["stable_head_time"] = 0
233
+
234
+ # Check for eye closure
235
+ if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
236
+ if state["eye_closed_time"] == 0:
237
+ state["eye_closed_time"] = current_time
238
+ elif current_time - state["eye_closed_time"] > state["EYE_CLOSURE_THRESHOLD"]:
239
+ cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
240
+ logs.append("Eyes have been closed for an extended period")
241
+ else:
242
+ if state["eye_closed_time"] > 0 and current_time - state["eye_closed_time"] < 0.5:
243
+ state["blink_count"] += 1
244
+ logs.append("Blink detected")
245
+ state["eye_closed_time"] = 0
246
+
247
+ elapsed_seconds = current_time - state["start_time"]
248
+ elapsed_minutes = elapsed_seconds / 60
249
+ blink_rate = state["blink_count"] / elapsed_minutes if elapsed_minutes > 0 else 0
250
+ cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
251
+ logs.append(f"Blink rate: {blink_rate:.1f}/min")
252
+
253
+ # Check for unconscious state
254
+ unconscious_conditions = [
255
+ state["stable_gaze_time"] > 0 and current_time - state["stable_gaze_time"] > state["TIME_THRESHOLD"],
256
+ blink_rate < state["BLINK_RATE_THRESHOLD"] and elapsed_minutes > 1,
257
+ state["eye_closed_time"] > 0 and current_time - state["eye_closed_time"] > state["EYE_CLOSURE_THRESHOLD"],
258
+ state["stable_head_time"] > 0 and current_time - state["stable_head_time"] > state["TIME_THRESHOLD"]
259
+ ]
260
+
261
+ if sum(unconscious_conditions) >= 2:
262
+ cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
263
+ state["is_unconscious"] = True
264
+ logs.append("WARNING: Possible unconscious state detected!")
265
+ else:
266
+ state["is_unconscious"] = False
267
+
268
+ # Update log output with latest information
269
+ logs.append(f"Gaze: ({smoothed_gaze[0]:.2f}, {smoothed_gaze[1]:.2f}) | Head: {smoothed_head:.2f} | EAR: {smoothed_ear:.2f}")
270
+ log_text = "\n".join(logs)
271
+
272
+ # Keep log_output to a reasonable size
273
+ log_lines = log_output.split("\n") if log_output else []
274
+ log_lines.append(log_text)
275
+ if len(log_lines) > 20: # Keep only last 20 entries
276
+ log_lines = log_lines[-20:]
277
+ updated_log = "\n".join(log_lines)
278
+
279
+ # Convert from BGR to RGB for Gradio
280
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
281
+
282
+ return state, frame_rgb, updated_log
283
+
284
+ def create_webcam_interface():
285
+ webcam = gr.Image(source="webcam", streaming=True)
286
+ log_output = gr.Textbox(label="Gaze Tracking Log", lines=10)
287
+ processed_frame = gr.Image(label="Processed Frame")
288
+
289
+ webcam_demo = gr.Interface(
290
+ fn=process_webcam,
291
+ inputs=[gr.State(), gr.State("")],
292
+ outputs=[gr.State(), processed_frame, log_output],
293
+ live=True,
294
+ title="Real-time Gaze Tracking"
295
+ )
296
+ return webcam_demo
297
+
298
+ def create_video_interface():
299
+ video_demo = gr.Interface(
300
+ fn=analyze_video,
301
+ inputs=gr.Video(),
302
+ outputs=gr.Video(),
303
+ title="Video Analysis",
304
+ description="Upload a video to analyze gaze and drowsiness."
305
+ )
306
+ return video_demo
307
+
308
+ demo = gr.TabbedInterface(
309
+ [create_video_interface(), create_webcam_interface()],
310
+ ["Video Upload", "Webcam"],
311
  title="Gaze Tracker",
312
+ description="Analyze gaze and detect drowsiness in videos or using webcam."
313
  )
314
 
315
  if __name__ == "__main__":
316
+ demo.launch()