Spaces:

Guru-25
/

driver

Sleeping

App Files Files Community

Guru-25 commited on 19 days ago

Commit

1b36b40

verified ·

1 Parent(s): a3ae3eb

new

Browse files

Files changed (2) hide show

app.py +231 -236
requirements.txt +5 -4

app.py CHANGED Viewed

@@ -6,313 +6,299 @@ import os
 import time
 from scripts.inference import GazePredictor
 from utils.ear_utils import BlinkDetector
 def smooth_values(history, current_value, window_size=5):
     if current_value is not None:
-        history.append(current_value)
     if len(history) > window_size:
         history.pop(0)
-    return np.mean(history, axis=0) if isinstance(current_value, np.ndarray) and history else current_value if current_value is not None else 0
 MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
 def analyze_video(input_video):
     cap = cv2.VideoCapture(input_video)
-    gaze_predictor = GazePredictor(MODEL_PATH)
-    blink_detector = BlinkDetector()
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
     os.close(temp_fd)
     out = None
-    GAZE_STABILITY_THRESHOLD = 0.5
-    TIME_THRESHOLD = 15
-    BLINK_RATE_THRESHOLD = 1
-    EYE_CLOSURE_THRESHOLD = 10
-    HEAD_STABILITY_THRESHOLD = 0.05
-    gaze_history = []
-    head_history = []
-    ear_history = []
-    stable_gaze_time = 0
-    stable_head_time = 0
-    eye_closed_time = 0
-    blink_count = 0
-    start_time = 0
-    is_unconscious = False
-    frame_count = 0
-    fps = cap.get(cv2.CAP_PROP_FPS) or 20
     while True:
         ret, frame = cap.read()
         if not ret:
             break
-        frame_count += 1
-        if start_time == 0:
-            start_time = frame_count / fps
-        head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
-        current_gaze = np.array([gaze_h, gaze_v])
-        smoothed_gaze = smooth_values(gaze_history, current_gaze)
-        ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
         if ear is None:
-            cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-            smoothed_head = smooth_values(head_history, None)
-            smoothed_ear = smooth_values(ear_history, None)
         else:
-            smoothed_head = smooth_values(head_history, head_pose)
-            smoothed_ear = smooth_values(ear_history, ear)
-            if smoothed_ear >= blink_detector.EAR_THRESHOLD:
-                cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
-                cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
-        cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        if len(gaze_history) > 1:
-            gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
-            if gaze_diff < GAZE_STABILITY_THRESHOLD:
-                if stable_gaze_time == 0:
-                    stable_gaze_time = frame_count / fps
-            else:
-                stable_gaze_time = 0
-        if len(head_history) > 1 and head_pose is not None:
-            head_diff = abs(smoothed_head - head_history[-2])
             if head_diff < HEAD_STABILITY_THRESHOLD:
-                if stable_head_time == 0:
-                    stable_head_time = frame_count / fps
             else:
-                stable_head_time = 0
-        if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
-            if eye_closed_time == 0:
-                eye_closed_time = frame_count / fps
-            elif (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD:
                 cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
         else:
-            if eye_closed_time > 0 and (frame_count / fps) - eye_closed_time < 0.5:
-                blink_count += 1
-            eye_closed_time = 0
-        elapsed_minutes = ((frame_count / fps) - start_time) / 60 if start_time > 0 else 0
-        blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
-        cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
         unconscious_conditions = [
-            stable_gaze_time > 0 and (frame_count / fps) - stable_gaze_time > TIME_THRESHOLD,
-            blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
-            eye_closed_time > 0 and (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD,
-            stable_head_time > 0 and (frame_count / fps) - stable_head_time > TIME_THRESHOLD
         ]
         if sum(unconscious_conditions) >= 2:
             cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-            is_unconscious = True
         else:
-            is_unconscious = False
         if out is None:
             h, w = frame.shape[:2]
             out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
         out.write(frame)
     cap.release()
     if out:
         out.release()
     return temp_path
-def process_webcam(state):
-    """Process webcam frames in real-time and update log output"""
-    if state is None:
-        # Initialize state
-        gaze_predictor = GazePredictor(MODEL_PATH)
-        blink_detector = BlinkDetector()
-        cap = cv2.VideoCapture(0)
-        if not cap.isOpened():
-            return None, "Error: Could not open webcam.", None
-        # Try to set webcam properties for better performance
-        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
-        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
-        GAZE_STABILITY_THRESHOLD = 0.5
-        TIME_THRESHOLD = 15
-        BLINK_RATE_THRESHOLD = 1
-        EYE_CLOSURE_THRESHOLD = 10
-        HEAD_STABILITY_THRESHOLD = 0.05
-        gaze_history = []
-        head_history = []
-        ear_history = []
-        stable_gaze_time = 0
-        stable_head_time = 0
-        eye_closed_time = 0
-        blink_count = 0
-        start_time = time.time()
-        is_unconscious = False
-        log_output = ""
-        state = {
-            "gaze_predictor": gaze_predictor,
-            "blink_detector": blink_detector,
-            "cap": cap,
-            "gaze_history": gaze_history,
-            "head_history": head_history,
-            "ear_history": ear_history,
-            "stable_gaze_time": stable_gaze_time,
-            "stable_head_time": stable_head_time,
-            "eye_closed_time": eye_closed_time,
-            "blink_count": blink_count,
-            "start_time": start_time,
-            "is_unconscious": is_unconscious,
-            "GAZE_STABILITY_THRESHOLD": GAZE_STABILITY_THRESHOLD,
-            "TIME_THRESHOLD": TIME_THRESHOLD,
-            "BLINK_RATE_THRESHOLD": BLINK_RATE_THRESHOLD,
-            "EYE_CLOSURE_THRESHOLD": EYE_CLOSURE_THRESHOLD,
-            "HEAD_STABILITY_THRESHOLD": HEAD_STABILITY_THRESHOLD,
-            "log_output": log_output
-        }
-        return state, "Initializing webcam...", None
-    # Extract state variables
-    cap = state["cap"]
-    gaze_predictor = state["gaze_predictor"]
-    blink_detector = state["blink_detector"]
-    gaze_history = state["gaze_history"]
-    head_history = state["head_history"]
-    ear_history = state["ear_history"]
-    log_output = state["log_output"]
-    # Capture frame
-    ret, frame = cap.read()
-    if not ret or frame is None:
-        # Try to reinitialize the camera if frame capture fails
-        cap.release()
-        cap = cv2.VideoCapture(0)
-        if not cap.isOpened():
-            return state, log_output + "\nError: Could not read from webcam.", None
-        state["cap"] = cap
-        ret, frame = cap.read()
-        if not ret or frame is None:
-            return state, log_output + "\nError: Failed to capture frame after reinitialization.", None
-    # Process frame
     try:
         head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
-        current_gaze = np.array([gaze_h, gaze_v])
         smoothed_gaze = smooth_values(gaze_history, current_gaze)
         ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
-        # Update display and logs
-        current_time = time.time()
-        logs = []
         if ear is None:
-            cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
             smoothed_head = smooth_values(head_history, None)
             smoothed_ear = smooth_values(ear_history, None)
-            logs.append("No face detected")
         else:
             smoothed_head = smooth_values(head_history, head_pose)
             smoothed_ear = smooth_values(ear_history, ear)
-            if smoothed_ear >= blink_detector.EAR_THRESHOLD:
-                cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
-                cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
-        # Add metrics to frame
-        cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        # Check for gaze stability
-        if len(gaze_history) > 1:
-            gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
-            if gaze_diff < state["GAZE_STABILITY_THRESHOLD"]:
-                if state["stable_gaze_time"] == 0:
-                    state["stable_gaze_time"] = current_time
-            else:
-                state["stable_gaze_time"] = 0
-        # Check for head stability
-        if len(head_history) > 1 and head_pose is not None:
             head_diff = abs(smoothed_head - head_history[-2])
-            if head_diff < state["HEAD_STABILITY_THRESHOLD"]:
-                if state["stable_head_time"] == 0:
-                    state["stable_head_time"] = current_time
             else:
-                state["stable_head_time"] = 0
-        # Check for eye closure
-        if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
-            if state["eye_closed_time"] == 0:
-                state["eye_closed_time"] = current_time
-            elif current_time - state["eye_closed_time"] > state["EYE_CLOSURE_THRESHOLD"]:
                 cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-                logs.append("Eyes have been closed for an extended period")
         else:
-            if state["eye_closed_time"] > 0 and current_time - state["eye_closed_time"] < 0.5:
-                state["blink_count"] += 1
-                logs.append("Blink detected")
-            state["eye_closed_time"] = 0
-        elapsed_seconds = current_time - state["start_time"]
         elapsed_minutes = elapsed_seconds / 60
-        blink_rate = state["blink_count"] / elapsed_minutes if elapsed_minutes > 0 else 0
-        cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-        logs.append(f"Blink rate: {blink_rate:.1f}/min")
-        # Check for unconscious state
         unconscious_conditions = [
-            state["stable_gaze_time"] > 0 and current_time - state["stable_gaze_time"] > state["TIME_THRESHOLD"],
-            blink_rate < state["BLINK_RATE_THRESHOLD"] and elapsed_minutes > 1,
-            state["eye_closed_time"] > 0 and current_time - state["eye_closed_time"] > state["EYE_CLOSURE_THRESHOLD"],
-            state["stable_head_time"] > 0 and current_time - state["stable_head_time"] > state["TIME_THRESHOLD"]
         ]
         if sum(unconscious_conditions) >= 2:
             cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-            state["is_unconscious"] = True
-            logs.append("WARNING: Possible unconscious state detected!")
         else:
-            state["is_unconscious"] = False
-        # Update log output with latest information
-        logs.append(f"Gaze: ({smoothed_gaze[0]:.2f}, {smoothed_gaze[1]:.2f}) | Head: {smoothed_head:.2f} | EAR: {smoothed_ear:.2f}")
-        log_text = "\n".join(logs)
-        # Keep log_output to a reasonable size
-        log_lines = log_output.split("\n") if log_output else []
-        log_lines.append(log_text)
-        if len(log_lines) > 20:  # Keep only last 20 entries
-            log_lines = log_lines[-20:]
-        state["log_output"] = "\n".join(log_lines)
-        # Convert from BGR to RGB for Gradio
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        return state, state["log_output"], frame_rgb
     except Exception as e:
-        error_msg = f"Error processing frame: {str(e)}"
-        return state, log_output + "\n" + error_msg, None
 def create_webcam_interface():
-    log_output = gr.Textbox(label="Gaze Tracking Log", lines=10)
-    processed_frame = gr.Image(label="Processed Frame")
-    webcam_demo = gr.Interface(
-        fn=process_webcam,
-        inputs=[gr.State()],
-        outputs=[gr.State(), log_output, processed_frame],
-        live=True,
-        title="Real-time Gaze Tracking"
-    )
     return webcam_demo
 def create_video_interface():
@@ -325,7 +311,6 @@ def create_video_interface():
     )
     return video_demo
-# Create a tabbed interface without the unsupported 'description' parameter
 demo = gr.TabbedInterface(
     [create_video_interface(), create_webcam_interface()],
     ["Video Upload", "Webcam"],
@@ -333,4 +318,14 @@ demo = gr.TabbedInterface(
 )
 if __name__ == "__main__":
     demo.launch()

 import time
 from scripts.inference import GazePredictor
 from utils.ear_utils import BlinkDetector
+from gradio_webrtc import WebRTC
 def smooth_values(history, current_value, window_size=5):
     if current_value is not None:
+        if isinstance(current_value, np.ndarray):
+            history.append(current_value)
+        elif isinstance(current_value, (int, float)):
+            history.append(current_value)
     if len(history) > window_size:
         history.pop(0)
+    if not history:
+        return current_value
+    if all(isinstance(item, np.ndarray) for item in history):
+        first_shape = history[0].shape
+        if all(item.shape == first_shape for item in history):
+            return np.mean(history, axis=0)
+        else:
+            return history[-1] if history else None
+    elif all(isinstance(item, (int, float)) for item in history):
+        return np.mean(history)
+    else:
+        return history[-1] if history else None
 MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
+gaze_predictor = GazePredictor(MODEL_PATH)
+blink_detector = BlinkDetector()
+gaze_history = []
+head_history = []
+ear_history = []
+stable_gaze_time = 0
+stable_head_time = 0
+eye_closed_time = 0
+blink_count = 0
+start_time = 0
+is_unconscious = False
+frame_count_webcam = 0
+GAZE_STABILITY_THRESHOLD = 0.5
+TIME_THRESHOLD = 15
+BLINK_RATE_THRESHOLD = 1
+EYE_CLOSURE_THRESHOLD = 10
+HEAD_STABILITY_THRESHOLD = 0.05
 def analyze_video(input_video):
     cap = cv2.VideoCapture(input_video)
+    local_gaze_predictor = GazePredictor(MODEL_PATH)
+    local_blink_detector = BlinkDetector()
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
     os.close(temp_fd)
     out = None
+    video_gaze_history = []
+    video_head_history = []
+    video_ear_history = []
+    video_stable_gaze_time = 0
+    video_stable_head_time = 0
+    video_eye_closed_time = 0
+    video_blink_count = 0
+    video_start_time = 0
+    video_is_unconscious = False
+    video_frame_count = 0
+    fps = cap.get(cv2.CAP_PROP_FPS) or 30
     while True:
         ret, frame = cap.read()
         if not ret:
             break
+        video_frame_count += 1
+        current_time_video = video_frame_count / fps
+        if video_start_time == 0:
+            video_start_time = current_time_video
+        head_pose_gaze, gaze_h, gaze_v = local_gaze_predictor.predict_gaze(frame)
+        current_gaze = np.array([gaze_h, gaze_v]) if gaze_h is not None and gaze_v is not None else None
+        smoothed_gaze = smooth_values(video_gaze_history, current_gaze)
+        ear, left_eye, right_eye, head_pose, left_iris, right_iris = local_blink_detector.detect_blinks(frame)
         if ear is None:
+            cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+            smoothed_head = smooth_values(video_head_history, None)
+            smoothed_ear = smooth_values(video_ear_history, None)
         else:
+            smoothed_head = smooth_values(video_head_history, head_pose)
+            smoothed_ear = smooth_values(video_ear_history, ear)
+            if smoothed_ear >= local_blink_detector.EAR_THRESHOLD and left_iris and right_iris:
+                if all(isinstance(coord, (int, float)) and coord >= 0 for coord in left_iris) and \
+                   all(isinstance(coord, (int, float)) and coord >= 0 for coord in right_iris):
+                    try:
+                        cv2.drawMarker(frame, tuple(map(int, left_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
+                        cv2.drawMarker(frame, tuple(map(int, right_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
+                    except OverflowError:
+                        print(f"Warning: OverflowError drawing iris markers at {left_iris}, {right_iris}")
+        gaze_text_h = f"Gaze H: {smoothed_gaze[0]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 0 else "Gaze H: N/A"
+        gaze_text_v = f"Gaze V: {smoothed_gaze[1]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 1 else "Gaze V: N/A"
+        head_text = f"Head Pose: {smoothed_head:.2f}" if smoothed_head is not None else "Head Pose: N/A"
+        ear_text = f"EAR: {smoothed_ear:.2f}" if smoothed_ear is not None else "EAR: N/A"
+        cv2.putText(frame, gaze_text_h, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        cv2.putText(frame, gaze_text_v, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        cv2.putText(frame, head_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        cv2.putText(frame, ear_text, (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        if len(video_gaze_history) > 1 and smoothed_gaze is not None and video_gaze_history[-2] is not None:
+            try:
+                gaze_diff = np.sqrt(np.sum((smoothed_gaze - video_gaze_history[-2])**2))
+                if gaze_diff < GAZE_STABILITY_THRESHOLD:
+                    if video_stable_gaze_time == 0:
+                        video_stable_gaze_time = current_time_video
+                else:
+                    video_stable_gaze_time = 0
+            except TypeError:
+                video_stable_gaze_time = 0
+        else:
+            video_stable_gaze_time = 0
+        if len(video_head_history) > 1 and smoothed_head is not None and video_head_history[-2] is not None:
+            head_diff = abs(smoothed_head - video_head_history[-2])
             if head_diff < HEAD_STABILITY_THRESHOLD:
+                if video_stable_head_time == 0:
+                    video_stable_head_time = current_time_video
             else:
+                video_stable_head_time = 0
+        else:
+            video_stable_head_time = 0
+        if ear is not None and smoothed_ear is not None and smoothed_ear < local_blink_detector.EAR_THRESHOLD:
+            if video_eye_closed_time == 0:
+                video_eye_closed_time = current_time_video
+            elif current_time_video - video_eye_closed_time > EYE_CLOSURE_THRESHOLD:
                 cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+        elif ear is not None:
+            if video_eye_closed_time > 0 and current_time_video - video_eye_closed_time < 0.5:
+                video_blink_count += 1
+            video_eye_closed_time = 0
         else:
+            video_eye_closed_time = 0
+        elapsed_seconds_video = current_time_video - video_start_time if video_start_time > 0 else 0
+        elapsed_minutes_video = elapsed_seconds_video / 60
+        blink_rate = video_blink_count / elapsed_minutes_video if elapsed_minutes_video > 0 else 0
+        cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
         unconscious_conditions = [
+            video_stable_gaze_time > 0 and current_time_video - video_stable_gaze_time > TIME_THRESHOLD,
+            blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes_video > 1,
+            video_eye_closed_time > 0 and current_time_video - video_eye_closed_time > EYE_CLOSURE_THRESHOLD,
+            video_stable_head_time > 0 and current_time_video - video_stable_head_time > TIME_THRESHOLD
         ]
         if sum(unconscious_conditions) >= 2:
             cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+            video_is_unconscious = True
         else:
+            video_is_unconscious = False
         if out is None:
             h, w = frame.shape[:2]
             out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
         out.write(frame)
     cap.release()
     if out:
         out.release()
     return temp_path
+def process_webrtc_frame(frame):
+    global gaze_history, head_history, ear_history, stable_gaze_time, stable_head_time
+    global eye_closed_time, blink_count, start_time, is_unconscious, frame_count_webcam
+    if frame is None:
+        return np.zeros((480, 640, 3), dtype=np.uint8)
+    frame_count_webcam += 1
+    current_time = time.time()
+    if start_time == 0:
+        start_time = current_time
     try:
         head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
+        current_gaze = np.array([gaze_h, gaze_v]) if gaze_h is not None and gaze_v is not None else None
         smoothed_gaze = smooth_values(gaze_history, current_gaze)
         ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
         if ear is None:
+            cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
             smoothed_head = smooth_values(head_history, None)
             smoothed_ear = smooth_values(ear_history, None)
         else:
             smoothed_head = smooth_values(head_history, head_pose)
             smoothed_ear = smooth_values(ear_history, ear)
+            if smoothed_ear >= blink_detector.EAR_THRESHOLD and left_iris and right_iris:
+                if all(isinstance(coord, (int, float)) and coord >= 0 for coord in left_iris) and \
+                   all(isinstance(coord, (int, float)) and coord >= 0 for coord in right_iris):
+                    try:
+                        cv2.drawMarker(frame, tuple(map(int, left_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
+                        cv2.drawMarker(frame, tuple(map(int, right_iris)), (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
+                    except OverflowError:
+                        print(f"Warning: OverflowError drawing iris markers at {left_iris}, {right_iris}")
+        gaze_text_h = f"Gaze H: {smoothed_gaze[0]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 0 else "Gaze H: N/A"
+        gaze_text_v = f"Gaze V: {smoothed_gaze[1]:.2f}" if smoothed_gaze is not None and len(smoothed_gaze) > 1 else "Gaze V: N/A"
+        head_text = f"Head Pose: {smoothed_head:.2f}" if smoothed_head is not None else "Head Pose: N/A"
+        ear_text = f"EAR: {smoothed_ear:.2f}" if smoothed_ear is not None else "EAR: N/A"
+        cv2.putText(frame, gaze_text_h, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        cv2.putText(frame, gaze_text_v, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        cv2.putText(frame, head_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        cv2.putText(frame, ear_text, (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+        if len(gaze_history) > 1 and smoothed_gaze is not None and gaze_history[-2] is not None:
+            try:
+                gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
+                if gaze_diff < GAZE_STABILITY_THRESHOLD:
+                    if stable_gaze_time == 0:
+                        stable_gaze_time = current_time
+                else:
+                    stable_gaze_time = 0
+            except TypeError:
+                stable_gaze_time = 0
+        else:
+            stable_gaze_time = 0
+        if len(head_history) > 1 and smoothed_head is not None and head_history[-2] is not None:
             head_diff = abs(smoothed_head - head_history[-2])
+            if head_diff < HEAD_STABILITY_THRESHOLD:
+                if stable_head_time == 0:
+                    stable_head_time = current_time
             else:
+                stable_head_time = 0
+        else:
+            stable_head_time = 0
+        if ear is not None and smoothed_ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
+            if eye_closed_time == 0:
+                eye_closed_time = current_time
+            elif current_time - eye_closed_time > EYE_CLOSURE_THRESHOLD:
                 cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+        elif ear is not None:
+            if eye_closed_time > 0 and current_time - eye_closed_time < 0.5:
+                blink_count += 1
+            eye_closed_time = 0
         else:
+            eye_closed_time = 0
+        elapsed_seconds = current_time - start_time if start_time > 0 else 0
         elapsed_minutes = elapsed_seconds / 60
+        blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
+        cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
         unconscious_conditions = [
+            stable_gaze_time > 0 and current_time - stable_gaze_time > TIME_THRESHOLD,
+            blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
+            eye_closed_time > 0 and current_time - eye_closed_time > EYE_CLOSURE_THRESHOLD,
+            stable_head_time > 0 and current_time - stable_head_time > TIME_THRESHOLD
         ]
         if sum(unconscious_conditions) >= 2:
             cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+            is_unconscious = True
         else:
+            is_unconscious = False
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        return frame_rgb
     except Exception as e:
+        print(f"Error processing frame: {e}")
+        error_frame = np.zeros((480, 640, 3), dtype=np.uint8)
+        if not error_frame.flags.writeable:
+            error_frame = error_frame.copy()
+        cv2.putText(error_frame, f"Error: {e}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
+        return error_frame
 def create_webcam_interface():
+    with gr.Blocks() as webcam_demo:
+        gr.Markdown("## Real-time Gaze Tracking via Webcam")
+        with gr.Row():
+            webcam_stream = WebRTC(label="Webcam Stream")
+        webcam_stream.stream(
+            fn=process_webrtc_frame,
+            inputs=[webcam_stream],
+            outputs=[webcam_stream]
+        )
     return webcam_demo
 def create_video_interface():
     )
     return video_demo
 demo = gr.TabbedInterface(
     [create_video_interface(), create_webcam_interface()],
     ["Video Upload", "Webcam"],
 )
 if __name__ == "__main__":
+    gaze_history = []
+    head_history = []
+    ear_history = []
+    stable_gaze_time = 0
+    stable_head_time = 0
+    eye_closed_time = 0
+    blink_count = 0
+    start_time = 0
+    is_unconscious = False
+    frame_count_webcam = 0
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,11 +1,12 @@
 torchvision==0.17.1
 opencv-python==4.10.0.84
 numpy==1.26.4
-mediapipe==0.10.21  # Updated to the latest available version from your error list
 imutils==0.5.4
 matplotlib==3.8.3
 playsound==1.2.2
-gradio==4.27.0
 tensorflow
-pygame

 torchvision==0.17.1
 opencv-python==4.10.0.84
 numpy==1.26.4
+mediapipe==0.10.21
 imutils==0.5.4
 matplotlib==3.8.3
 playsound==1.2.2
+gradio>=5.0.0
+gradio-webrtc==0.0.4
 tensorflow
+pygame
+twilio