File size: 5,477 Bytes
b8b61aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
import cv2
import numpy as np
import tempfile
import os
from scripts.inference import GazePredictor
from utils.ear_utils import BlinkDetector

def smooth_values(history, current_value, window_size=5):
    if current_value is not None:
        history.append(current_value)
    if len(history) > window_size:
        history.pop(0)
    return np.mean(history, axis=0) if isinstance(current_value, np.ndarray) and history else current_value if current_value is not None else 0

MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")

def analyze_video(input_video):
    cap = cv2.VideoCapture(input_video)
    gaze_predictor = GazePredictor(MODEL_PATH)
    blink_detector = BlinkDetector()
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
    os.close(temp_fd)
    out = None

    GAZE_STABILITY_THRESHOLD = 0.5
    TIME_THRESHOLD = 15
    BLINK_RATE_THRESHOLD = 1
    EYE_CLOSURE_THRESHOLD = 10
    HEAD_STABILITY_THRESHOLD = 0.05

    gaze_history = []
    head_history = []
    ear_history = []
    stable_gaze_time = 0
    stable_head_time = 0
    eye_closed_time = 0
    blink_count = 0
    start_time = 0
    is_unconscious = False

    frame_count = 0
    fps = cap.get(cv2.CAP_PROP_FPS) or 20

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_count += 1
        if start_time == 0:
            start_time = frame_count / fps

        head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
        current_gaze = np.array([gaze_h, gaze_v])
        smoothed_gaze = smooth_values(gaze_history, current_gaze)

        ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
        if ear is None:
            cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            smoothed_head = smooth_values(head_history, None)
            smoothed_ear = smooth_values(ear_history, None)
        else:
            smoothed_head = smooth_values(head_history, head_pose)
            smoothed_ear = smooth_values(ear_history, ear)
            if smoothed_ear >= blink_detector.EAR_THRESHOLD:
                cv2.drawMarker(frame, left_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
                cv2.drawMarker(frame, right_iris, (0, 255, 0), markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)

        cv2.putText(frame, f"Gaze H: {smoothed_gaze[0]:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Gaze V: {smoothed_gaze[1]:.2f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Head Pose: {smoothed_head:.2f}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"EAR: {smoothed_ear:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        if len(gaze_history) > 1:
            gaze_diff = np.sqrt(np.sum((smoothed_gaze - gaze_history[-2])**2))
            if gaze_diff < GAZE_STABILITY_THRESHOLD:
                if stable_gaze_time == 0:
                    stable_gaze_time = frame_count / fps
            else:
                stable_gaze_time = 0

        if len(head_history) > 1 and head_pose is not None:
            head_diff = abs(smoothed_head - head_history[-2])
            if head_diff < HEAD_STABILITY_THRESHOLD:
                if stable_head_time == 0:
                    stable_head_time = frame_count / fps
            else:
                stable_head_time = 0

        if ear is not None and smoothed_ear < blink_detector.EAR_THRESHOLD:
            if eye_closed_time == 0:
                eye_closed_time = frame_count / fps
            elif (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD:
                cv2.putText(frame, "Eyes Closed", (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        else:
            if eye_closed_time > 0 and (frame_count / fps) - eye_closed_time < 0.5:
                blink_count += 1
            eye_closed_time = 0

        elapsed_minutes = ((frame_count / fps) - start_time) / 60 if start_time > 0 else 0
        blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
        cv2.putText(frame, f"Blink Rate: {blink_rate:.1f}/min", (10, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        unconscious_conditions = [
            stable_gaze_time > 0 and (frame_count / fps) - stable_gaze_time > TIME_THRESHOLD,
            blink_rate < BLINK_RATE_THRESHOLD and elapsed_minutes > 1,
            eye_closed_time > 0 and (frame_count / fps) - eye_closed_time > EYE_CLOSURE_THRESHOLD,
            stable_head_time > 0 and (frame_count / fps) - stable_head_time > TIME_THRESHOLD
        ]
        if sum(unconscious_conditions) >= 2:
            cv2.putText(frame, "Unconscious Detected", (10, 270), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            is_unconscious = True
        else:
            is_unconscious = False

        if out is None:
            h, w = frame.shape[:2]
            out = cv2.VideoWriter(temp_path, fourcc, fps, (w, h))
        out.write(frame)
    cap.release()
    if out:
        out.release()
    return temp_path

iface = gr.Interface(
    fn=analyze_video,
    inputs=gr.Video(),
    outputs=gr.Video(),
    title="Gaze Tracker",
    description="Upload a video to analyze gaze and drowsiness."
)

if __name__ == "__main__":
    iface.launch()