new
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import cv2
|
|
3 |
import numpy as np
|
4 |
import tempfile
|
5 |
import os
|
|
|
6 |
from scripts.inference import GazePredictor
|
7 |
from utils.ear_utils import BlinkDetector
|
8 |
|
@@ -123,13 +124,193 @@ def analyze_video(input_video):
|
|
123 |
out.release()
|
124 |
return temp_path
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
title="Gaze Tracker",
|
131 |
-
description="
|
132 |
)
|
133 |
|
134 |
if __name__ == "__main__":
|
135 |
-
|
|
|
3 |
import numpy as np
|
4 |
import tempfile
|
5 |
import os
|
6 |
+
import time
|
7 |
from scripts.inference import GazePredictor
|
8 |
from utils.ear_utils import BlinkDetector
|
9 |
|
|
|
124 |
out.release()
|
125 |
return temp_path
|
126 |
|
127 |
+
def process_webcam(state, log_output):
|
128 |
+
"""Process webcam frames in real-time and update log output"""
|
129 |
+
if state is None:
|
130 |
+
# Initialize state
|
131 |
+
gaze_predictor = GazePredictor(MODEL_PATH)
|
132 |
+
blink_detector = BlinkDetector()
|
133 |
+
cap = cv2.VideoCapture(0)
|
134 |
+
|
135 |
+
if not cap.isOpened():
|
136 |
+
return None, None, "Error: Could not open webcam."
|
137 |
+
|
138 |
+
GAZE_STABILITY_THRESHOLD = 0.5
|
139 |
+
TIME_THRESHOLD = 15
|
140 |
+
BLINK_RATE_THRESHOLD = 1
|
141 |
+
EYE_CLOSURE_THRESHOLD = 10
|
142 |
+
HEAD_STABILITY_THRESHOLD = 0.05
|
143 |
+
|
144 |
+
gaze_history = []
|
145 |
+
head_history = []
|
146 |
+
ear_history = []
|
147 |
+
stable_gaze_time = 0
|
148 |
+
stable_head_time = 0
|
149 |
+
eye_closed_time = 0
|
150 |
+
blink_count = 0
|
151 |
+
start_time = time.time()
|
152 |
+
is_unconscious = False
|
153 |
+
|
154 |
+
state = {
|
155 |
+
"gaze_predictor": gaze_predictor,
|
156 |
+
"blink_detector": blink_detector,
|
157 |
+
"cap": cap,
|
158 |
+
"gaze_history": gaze_history,
|
159 |
+
"head_history": head_history,
|
160 |
+
"ear_history": ear_history,
|
161 |
+
"stable_gaze_time": stable_gaze_time,
|
162 |
+
"stable_head_time": stable_head_time,
|
163 |
+
"eye_closed_time": eye_closed_time,
|
164 |
+
"blink_count": blink_count,
|
165 |
+
"start_time": start_time,
|
166 |
+
"is_unconscious": is_unconscious,
|
167 |
+
"GAZE_STABILITY_THRESHOLD": GAZE_STABILITY_THRESHOLD,
|
168 |
+
"TIME_THRESHOLD": TIME_THRESHOLD,
|
169 |
+
"BLINK_RATE_THRESHOLD": BLINK_RATE_THRESHOLD,
|
170 |
+
"EYE_CLOSURE_THRESHOLD": EYE_CLOSURE_THRESHOLD,
|
171 |
+
"HEAD_STABILITY_THRESHOLD": HEAD_STABILITY_THRESHOLD
|
172 |
+
}
|
173 |
+
|
174 |
+
# Extract state variables
|
175 |
+
cap = state["cap"]
|
176 |
+
gaze_predictor = state["gaze_predictor"]
|
177 |
+
blink_detector = state["blink_detector"]
|
178 |
+
gaze_history = state["gaze_history"]
|
179 |
+
head_history = state["head_history"]
|
180 |
+
ear_history = state["ear_history"]
|
181 |
+
|
182 |
+
# Capture frame
|
183 |
+
ret, frame = cap.read()
|
184 |
+
if not ret:
|
185 |
+
return state, None, log_output + "\nError: Could not read from webcam."
|
186 |
+
|
187 |
+
# Process frame
|
188 |
+
head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
|
189 |
+
current_gaze = np.array([gaze_h, gaze_v])
|
190 |
+
smoothed_gaze = smooth_values(gaze_history, current_gaze)
|
191 |
+
|
192 |
+
ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
|
193 |
+
|
194 |
+
# Update display and logs
|
195 |
+
current_time = time.time()
|
196 |
+
logs = []
|
197 |
+
|
198 |
+
if ear is None:
|
199 |
+
cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
200 |
+
smoothed_head = smooth_values(head_history, None)
|
201 |
+
smoothed_ear = smooth_values(ear_history, None)
|
202 |
+
logs.append("No face detected")
|
203 |
+
else:
|
204 |
+
smoothed_head = smooth_values(head_history, head_pose)
|
205 |
+
smoothed_ear = smooth_values(ear_history, ear)
|
206 |
+
if smoothed_ear >= blink_detector.EAR_THRESHOLD:
|
207 |
+
cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
208 |
+
cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
|
209 |
+
|
210 |
+
# Add metrics to frame
|
211 |
+
cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
212 |
+
cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
213 |
+
cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
214 |
+
cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
215 |
+
|
216 |
+
# Check for gaze stability
|
217 |
+
if len(gaze_history) > 1:
|
218 |
+
gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
|
219 |
+
if gaze_diff < state["GAZE_STABILITY_THRESHOLD"]:
|
220 |
+
if state["stable_gaze_time"] == 0:
|
221 |
+
state["stable_gaze_time"] = current_time
|
222 |
+
else:
|
223 |
+
state["stable_gaze_time"] = 0
|
224 |
+
|
225 |
+
# Check for head stability
|
226 |
+
if len(head_history) > 1 and head_pose is not None:
|
227 |
+
head_diff = abs(smoothed_head - head_history[-2])
|
228 |
+
if head_diff < state["HEAD_STABILITY_THRESHOLD"]:
|
229 |
+
if state["stable_head_time"] == 0:
|
230 |
+
state["stable_head_time"] = current_time
|
231 |
+
else:
|
232 |
+
state["stable_head_time"] = 0
|
233 |
+
|
234 |
+
# Check for eye closure
|
235 |
+
if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
|
236 |
+
if state["eye_closed_time"] == 0:
|
237 |
+
state["eye_closed_time"] = current_time
|
238 |
+
elif current_time - state["eye_closed_time"] > state["EYE_CLOSURE_THRESHOLD"]:
|
239 |
+
cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
240 |
+
logs.append("Eyes have been closed for an extended period")
|
241 |
+
else:
|
242 |
+
if state["eye_closed_time"] > 0 and current_time - state["eye_closed_time"] < 0.5:
|
243 |
+
state["blink_count"] += 1
|
244 |
+
logs.append("Blink detected")
|
245 |
+
state["eye_closed_time"] = 0
|
246 |
+
|
247 |
+
elapsed_seconds = current_time - state["start_time"]
|
248 |
+
elapsed_minutes = elapsed_seconds / 60
|
249 |
+
blink_rate = state["blink_count"] / elapsed_minutes if elapsed_minutes > 0 else 0
|
250 |
+
cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
251 |
+
logs.append(f"Blink rate: {blink_rate:.1f}/min")
|
252 |
+
|
253 |
+
# Check for unconscious state
|
254 |
+
unconscious_conditions = [
|
255 |
+
state["stable_gaze_time"] > 0 and current_time - state["stable_gaze_time"] > state["TIME_THRESHOLD"],
|
256 |
+
blink_rate < state["BLINK_RATE_THRESHOLD"] and elapsed_minutes > 1,
|
257 |
+
state["eye_closed_time"] > 0 and current_time - state["eye_closed_time"] > state["EYE_CLOSURE_THRESHOLD"],
|
258 |
+
state["stable_head_time"] > 0 and current_time - state["stable_head_time"] > state["TIME_THRESHOLD"]
|
259 |
+
]
|
260 |
+
|
261 |
+
if sum(unconscious_conditions) >= 2:
|
262 |
+
cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
263 |
+
state["is_unconscious"] = True
|
264 |
+
logs.append("WARNING: Possible unconscious state detected!")
|
265 |
+
else:
|
266 |
+
state["is_unconscious"] = False
|
267 |
+
|
268 |
+
# Update log output with latest information
|
269 |
+
logs.append(f"Gaze: ({smoothed_gaze[0]:.2f}, {smoothed_gaze[1]:.2f}) | Head: {smoothed_head:.2f} | EAR: {smoothed_ear:.2f}")
|
270 |
+
log_text = "\n".join(logs)
|
271 |
+
|
272 |
+
# Keep log_output to a reasonable size
|
273 |
+
log_lines = log_output.split("\n") if log_output else []
|
274 |
+
log_lines.append(log_text)
|
275 |
+
if len(log_lines) > 20: # Keep only last 20 entries
|
276 |
+
log_lines = log_lines[-20:]
|
277 |
+
updated_log = "\n".join(log_lines)
|
278 |
+
|
279 |
+
# Convert from BGR to RGB for Gradio
|
280 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
281 |
+
|
282 |
+
return state, frame_rgb, updated_log
|
283 |
+
|
284 |
+
def create_webcam_interface():
|
285 |
+
webcam = gr.Image(source="webcam", streaming=True)
|
286 |
+
log_output = gr.Textbox(label="Gaze Tracking Log", lines=10)
|
287 |
+
processed_frame = gr.Image(label="Processed Frame")
|
288 |
+
|
289 |
+
webcam_demo = gr.Interface(
|
290 |
+
fn=process_webcam,
|
291 |
+
inputs=[gr.State(), gr.State("")],
|
292 |
+
outputs=[gr.State(), processed_frame, log_output],
|
293 |
+
live=True,
|
294 |
+
title="Real-time Gaze Tracking"
|
295 |
+
)
|
296 |
+
return webcam_demo
|
297 |
+
|
298 |
+
def create_video_interface():
|
299 |
+
video_demo = gr.Interface(
|
300 |
+
fn=analyze_video,
|
301 |
+
inputs=gr.Video(),
|
302 |
+
outputs=gr.Video(),
|
303 |
+
title="Video Analysis",
|
304 |
+
description="Upload a video to analyze gaze and drowsiness."
|
305 |
+
)
|
306 |
+
return video_demo
|
307 |
+
|
308 |
+
demo = gr.TabbedInterface(
|
309 |
+
[create_video_interface(), create_webcam_interface()],
|
310 |
+
["Video Upload", "Webcam"],
|
311 |
title="Gaze Tracker",
|
312 |
+
description="Analyze gaze and detect drowsiness in videos or using webcam."
|
313 |
)
|
314 |
|
315 |
if __name__ == "__main__":
|
316 |
+
demo.launch()
|