Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Sleeping

App Files Files Community

reab5555 commited on Jul 29, 2024

Commit

cb22ba0

verified ·

1 Parent(s): 981f936

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -72

app.py CHANGED Viewed

@@ -56,11 +56,11 @@ def process_and_show_completion(video_input_path, anomaly_threshold_input, fps,
 def on_button_click(video, threshold, fps):
     start_time = time.time()
-    # Show execution time immediately and make results tab visible
     yield {
         execution_time: gr.update(visible=True, value=0),
-        results_tab: gr.update(visible=True),
-        tabs: gr.update(selected="Results")
     }
     results = process_and_show_completion(video, threshold, fps)
@@ -107,80 +107,75 @@ with gr.Blocks() as iface:
     execution_time = gr.Number(label="Execution Time (seconds)", visible=False)
-    with gr.Tabs() as tabs:
-        results_tab = gr.TabItem("Results")
-        with results_tab:
-            with gr.Tabs():
-                with gr.TabItem("Facial Features"):
-                    video_display_facial = gr.Video(label="Input Video")
-                    results_text = gr.TextArea(label="Faces Breakdown", lines=5)
-                    mse_features_plot = gr.Plot(label="MSE: Facial Features")
-                    mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
-                    mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
-                    anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
-                    face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
-                with gr.TabItem("Body Posture"):
-                    video_display_body = gr.Video(label="Input Video")
-                    mse_posture_plot = gr.Plot(label="MSE: Body Posture")
-                    mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
-                    mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
-                    anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
-                with gr.TabItem("Voice"):
-                    video_display_voice = gr.Video(label="Input Video")
-                    mse_voice_plot = gr.Plot(label="MSE: Voice")
-                    mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
-                    mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
-                with gr.TabItem("Combined"):
-                    heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
-                    combined_mse_plot = gr.Plot(label="Combined MSE Plot")
-                    correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
-        with gr.TabItem("Description"):
-            with gr.Column():
-                gr.Markdown("""
-                # Multimodal Behavioral Anomalies Detection
-                The purpose of this tool is to detect anomalies in facial expressions, body language, and voice over the timeline of a video.
-                It extracts faces, postures, and voice features from video frames, detects unique facial features, body postures, and speaker embeddings, and analyzes them to identify anomalies using time series analysis, specifically utilizing a variational autoencoder (VAE) approach.
-                ## Applications
-                - Identify suspicious behavior in surveillance footage.
-                - Analyze micro-expressions.
-                - Monitor and assess emotional states in communications.
-                - Evaluate changes in vocal tone and speech patterns.
-                ## Features
-                - **Face Extraction**: Extracts faces from video frames using the MTCNN model.
-                - **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model.
-                - **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose.
-                - **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote.
-                - **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time.
-                - **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points.
-                ## Limitations
-                - **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against.
-                - **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent.
-                - **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features.
-                - **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis.
-                - **Generalization**: The model may not generalize well to all types of videos and contexts.
-                - **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding.
-                ## Conclusion
-                This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution.
-                """)
     process_btn.click(
         fn=on_button_click,
         inputs=[video_input, anomaly_threshold, fps_slider],
         outputs=[
-            execution_time, results_tab, tabs,
             results_text, mse_features_plot, mse_posture_plot, mse_voice_plot,
             mse_features_hist, mse_posture_hist, mse_voice_hist,
             mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,

 def on_button_click(video, threshold, fps):
     start_time = time.time()
+    # Show execution time immediately and hide description
     yield {
         execution_time: gr.update(visible=True, value=0),
+        description: gr.update(visible=False),
+        results: gr.update(visible=True)
     }
     results = process_and_show_completion(video, threshold, fps)
     execution_time = gr.Number(label="Execution Time (seconds)", visible=False)
+    description = gr.Markdown("""
+    # Multimodal Behavioral Anomalies Detection
+    The purpose of this tool is to detect anomalies in facial expressions, body language, and voice over the timeline of a video.
+    It extracts faces, postures, and voice features from video frames, detects unique facial features, body postures, and speaker embeddings, and analyzes them to identify anomalies using time series analysis, specifically utilizing a variational autoencoder (VAE) approach.
+    ## Applications
+    - Identify suspicious behavior in surveillance footage.
+    - Analyze micro-expressions.
+    - Monitor and assess emotional states in communications.
+    - Evaluate changes in vocal tone and speech patterns.
+    ## Features
+    - **Face Extraction**: Extracts faces from video frames using the MTCNN model.
+    - **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model.
+    - **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose.
+    - **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote.
+    - **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time.
+    - **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points.
+    ## Limitations
+    - **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against.
+    - **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent.
+    - **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features.
+    - **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis.
+    - **Generalization**: The model may not generalize well to all types of videos and contexts.
+    - **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding.
+    ## Conclusion
+    This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution.
+    """, visible=True)
+    with gr.Tabs(visible=False) as results:
+        with gr.TabItem("Facial Features"):
+            video_display_facial = gr.Video(label="Input Video")
+            results_text = gr.TextArea(label="Faces Breakdown", lines=5)
+            mse_features_plot = gr.Plot(label="MSE: Facial Features")
+            mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
+            mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
+            anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
+            face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
+        with gr.TabItem("Body Posture"):
+            video_display_body = gr.Video(label="Input Video")
+            mse_posture_plot = gr.Plot(label="MSE: Body Posture")
+            mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
+            mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
+            anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
+        with gr.TabItem("Voice"):
+            video_display_voice = gr.Video(label="Input Video")
+            mse_voice_plot = gr.Plot(label="MSE: Voice")
+            mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
+            mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
+        with gr.TabItem("Combined"):
+            heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
+            combined_mse_plot = gr.Plot(label="Combined MSE Plot")
+            correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
     process_btn.click(
         fn=on_button_click,
         inputs=[video_input, anomaly_threshold, fps_slider],
         outputs=[
+            execution_time, description, results,
             results_text, mse_features_plot, mse_posture_plot, mse_voice_plot,
             mse_features_hist, mse_posture_hist, mse_voice_hist,
             mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,