reab5555 commited on
Commit
cb22ba0
·
verified ·
1 Parent(s): 981f936

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -72
app.py CHANGED
@@ -56,11 +56,11 @@ def process_and_show_completion(video_input_path, anomaly_threshold_input, fps,
56
  def on_button_click(video, threshold, fps):
57
  start_time = time.time()
58
 
59
- # Show execution time immediately and make results tab visible
60
  yield {
61
  execution_time: gr.update(visible=True, value=0),
62
- results_tab: gr.update(visible=True),
63
- tabs: gr.update(selected="Results")
64
  }
65
 
66
  results = process_and_show_completion(video, threshold, fps)
@@ -107,80 +107,75 @@ with gr.Blocks() as iface:
107
 
108
  execution_time = gr.Number(label="Execution Time (seconds)", visible=False)
109
 
110
- with gr.Tabs() as tabs:
111
- results_tab = gr.TabItem("Results")
112
- with results_tab:
113
- with gr.Tabs():
114
- with gr.TabItem("Facial Features"):
115
- video_display_facial = gr.Video(label="Input Video")
116
- results_text = gr.TextArea(label="Faces Breakdown", lines=5)
117
- mse_features_plot = gr.Plot(label="MSE: Facial Features")
118
- mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
119
- mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
120
- anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
121
- face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
122
-
123
- with gr.TabItem("Body Posture"):
124
- video_display_body = gr.Video(label="Input Video")
125
- mse_posture_plot = gr.Plot(label="MSE: Body Posture")
126
- mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
127
- mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
128
- anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
129
-
130
- with gr.TabItem("Voice"):
131
- video_display_voice = gr.Video(label="Input Video")
132
- mse_voice_plot = gr.Plot(label="MSE: Voice")
133
- mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
134
- mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
135
-
136
- with gr.TabItem("Combined"):
137
- heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
138
- combined_mse_plot = gr.Plot(label="Combined MSE Plot")
139
- correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
140
-
141
- with gr.TabItem("Description"):
142
- with gr.Column():
143
- gr.Markdown("""
144
- # Multimodal Behavioral Anomalies Detection
145
-
146
- The purpose of this tool is to detect anomalies in facial expressions, body language, and voice over the timeline of a video.
147
-
148
- It extracts faces, postures, and voice features from video frames, detects unique facial features, body postures, and speaker embeddings, and analyzes them to identify anomalies using time series analysis, specifically utilizing a variational autoencoder (VAE) approach.
149
-
150
- ## Applications
151
-
152
- - Identify suspicious behavior in surveillance footage.
153
- - Analyze micro-expressions.
154
- - Monitor and assess emotional states in communications.
155
- - Evaluate changes in vocal tone and speech patterns.
156
-
157
- ## Features
158
-
159
- - **Face Extraction**: Extracts faces from video frames using the MTCNN model.
160
- - **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model.
161
- - **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose.
162
- - **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote.
163
- - **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time.
164
- - **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points.
165
-
166
- ## Limitations
167
-
168
- - **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against.
169
- - **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent.
170
- - **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features.
171
- - **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis.
172
- - **Generalization**: The model may not generalize well to all types of videos and contexts.
173
- - **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding.
174
-
175
- ## Conclusion
176
- This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution.
177
- """)
178
 
179
  process_btn.click(
180
  fn=on_button_click,
181
  inputs=[video_input, anomaly_threshold, fps_slider],
182
  outputs=[
183
- execution_time, results_tab, tabs,
184
  results_text, mse_features_plot, mse_posture_plot, mse_voice_plot,
185
  mse_features_hist, mse_posture_hist, mse_voice_hist,
186
  mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,
 
56
  def on_button_click(video, threshold, fps):
57
  start_time = time.time()
58
 
59
+ # Show execution time immediately and hide description
60
  yield {
61
  execution_time: gr.update(visible=True, value=0),
62
+ description: gr.update(visible=False),
63
+ results: gr.update(visible=True)
64
  }
65
 
66
  results = process_and_show_completion(video, threshold, fps)
 
107
 
108
  execution_time = gr.Number(label="Execution Time (seconds)", visible=False)
109
 
110
+ description = gr.Markdown("""
111
+ # Multimodal Behavioral Anomalies Detection
112
+
113
+ The purpose of this tool is to detect anomalies in facial expressions, body language, and voice over the timeline of a video.
114
+
115
+ It extracts faces, postures, and voice features from video frames, detects unique facial features, body postures, and speaker embeddings, and analyzes them to identify anomalies using time series analysis, specifically utilizing a variational autoencoder (VAE) approach.
116
+
117
+ ## Applications
118
+
119
+ - Identify suspicious behavior in surveillance footage.
120
+ - Analyze micro-expressions.
121
+ - Monitor and assess emotional states in communications.
122
+ - Evaluate changes in vocal tone and speech patterns.
123
+
124
+ ## Features
125
+
126
+ - **Face Extraction**: Extracts faces from video frames using the MTCNN model.
127
+ - **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model.
128
+ - **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose.
129
+ - **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote.
130
+ - **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time.
131
+ - **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points.
132
+
133
+ ## Limitations
134
+
135
+ - **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against.
136
+ - **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent.
137
+ - **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features.
138
+ - **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis.
139
+ - **Generalization**: The model may not generalize well to all types of videos and contexts.
140
+ - **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding.
141
+
142
+ ## Conclusion
143
+ This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution.
144
+ """, visible=True)
145
+
146
+ with gr.Tabs(visible=False) as results:
147
+ with gr.TabItem("Facial Features"):
148
+ video_display_facial = gr.Video(label="Input Video")
149
+ results_text = gr.TextArea(label="Faces Breakdown", lines=5)
150
+ mse_features_plot = gr.Plot(label="MSE: Facial Features")
151
+ mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
152
+ mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
153
+ anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
154
+ face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
155
+
156
+ with gr.TabItem("Body Posture"):
157
+ video_display_body = gr.Video(label="Input Video")
158
+ mse_posture_plot = gr.Plot(label="MSE: Body Posture")
159
+ mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
160
+ mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
161
+ anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
162
+
163
+ with gr.TabItem("Voice"):
164
+ video_display_voice = gr.Video(label="Input Video")
165
+ mse_voice_plot = gr.Plot(label="MSE: Voice")
166
+ mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
167
+ mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
168
+
169
+ with gr.TabItem("Combined"):
170
+ heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
171
+ combined_mse_plot = gr.Plot(label="Combined MSE Plot")
172
+ correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
 
 
 
 
 
173
 
174
  process_btn.click(
175
  fn=on_button_click,
176
  inputs=[video_input, anomaly_threshold, fps_slider],
177
  outputs=[
178
+ execution_time, description, results,
179
  results_text, mse_features_plot, mse_posture_plot, mse_voice_plot,
180
  mse_features_hist, mse_posture_hist, mse_voice_hist,
181
  mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,