Spaces:

wuhp
/

yolohost

Sleeping

App Files Files Community

wuhp commited on 28 days ago

Commit

a32514b

verified ·

1 Parent(s): 5454bc6

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -41

app.py CHANGED Viewed

@@ -2,8 +2,10 @@ import gradio as gr
 from ultralytics import YOLO
 import cv2
 import tempfile
-# Load a custom YOLO model from the uploaded file.
 def load_model(model_file):
     try:
         model = YOLO(model_file.name)
@@ -11,33 +13,55 @@ def load_model(model_file):
     except Exception as e:
         return f"Error loading model: {e}"
-# Run inference on an image and write the output to a PNG file.
-def predict_image(model, image):
     try:
-        results = model(image)
-        annotated_frame = results[0].plot()  # Works for detection, segmentation, and OBB models.
-        # Write annotated image to a temporary file.
         tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
         cv2.imwrite(tmp.name, annotated_frame)
-        return tmp.name
     except Exception as e:
-        return f"Error during image inference: {e}"
-# Run inference on a video by processing frame-by-frame,
-# and write the annotated video to an MP4 file.
-def predict_video(model, video_file):
     try:
         cap = cv2.VideoCapture(video_file.name)
         frames = []
-        success, frame = cap.read()
-        while success:
-            results = model(frame)
-            annotated_frame = results[0].plot()
-            frames.append(annotated_frame)
             success, frame = cap.read()
         cap.release()
         if not frames:
-            return f"Error: No frames processed"
         height, width, _ = frames[0].shape
         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
         tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
@@ -45,46 +69,69 @@ def predict_video(model, video_file):
         for frame in frames:
             out.write(frame)
         out.release()
-        return tmp.name
     except Exception as e:
-        return f"Error during video inference: {e}"
-# Main inference function: loads the custom model and processes the input media.
-# Returns a tuple: (annotated_image, annotated_video).
-# One element will be a file path and the other None, based on the media type.
-def inference(model_file, input_media, media_type):
     model = load_model(model_file)
-    if isinstance(model, str):
-        # An error occurred during model loading.
-        return (model, None)
     if media_type == "Image":
-        out_image = predict_image(model, input_media)
-        return (out_image, None)
     elif media_type == "Video":
-        out_video = predict_video(model, input_media)
-        return (None, out_video)
     else:
-        return ("Unsupported media type", None)
 # Define Gradio interface components.
 model_file_input = gr.File(label="Upload Custom YOLO Model (.pt file)")
 media_file_input = gr.File(label="Upload Image/Video File")
 media_type_dropdown = gr.Radio(choices=["Image", "Video"], label="Select Media Type", value="Image")
-# Define two outputs: one for images and one for videos.
-output_image = gr.Image(label="Annotated Image")
-output_video = gr.Video(label="Annotated Video")
-# Create a Gradio interface that returns a tuple: (image, video).
 iface = gr.Interface(
     fn=inference,
-    inputs=[model_file_input, media_file_input, media_type_dropdown],
-    outputs=[output_image, output_video],
-    title="Custom YOLO Model Inference",
     description=(
-        "Upload your custom YOLO model (detection, segmentation, or OBB) along with an image or video file "
-        "to run inference. The system loads your model dynamically, processes the media, and displays the output."
     )
 )

 from ultralytics import YOLO
 import cv2
 import tempfile
+import time
+import numpy as np
+# Load the custom YOLO model from the uploaded file.
 def load_model(model_file):
     try:
         model = YOLO(model_file.name)
     except Exception as e:
         return f"Error loading model: {e}"
+# Run inference on an image, apply the confidence threshold, and save the result.
+def predict_image(model, image, conf):
     try:
+        start_time = time.time()
+        # Pass the confidence threshold to the model (Ultralytics models accept this as a keyword argument).
+        results = model(image, conf=conf)
+        process_time = time.time() - start_time
+        # Use the model's built-in plot() method to overlay detections.
+        annotated_frame = results[0].plot()
+        # Count detections if available (assumes results[0] contains a 'boxes' attribute).
+        num_detections = len(results[0].boxes) if hasattr(results[0], "boxes") else "N/A"
+        # Write the annotated image to a temporary PNG file.
         tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
         cv2.imwrite(tmp.name, annotated_frame)
+        return tmp.name, process_time, num_detections
     except Exception as e:
+        return f"Error during image inference: {e}", None, None
+# Run inference on a video by processing frames with a given frame step and saving the output.
+def predict_video(model, video_file, conf, frame_step):
     try:
         cap = cv2.VideoCapture(video_file.name)
         frames = []
+        frame_count = 0
+        start_time = time.time()
+        while True:
             success, frame = cap.read()
+            if not success:
+                break
+            # Process only every nth frame (frame_step controls this).
+            if frame_count % frame_step == 0:
+                results = model(frame, conf=conf)
+                annotated_frame = results[0].plot()
+                frames.append(annotated_frame)
+            else:
+                # If skipping, add the original frame (or you could choose not to add anything).
+                frames.append(frame)
+            frame_count += 1
+        process_time = time.time() - start_time
         cap.release()
         if not frames:
+            return f"Error: No frames processed", None, None
         height, width, _ = frames[0].shape
         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
         tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
         for frame in frames:
             out.write(frame)
         out.release()
+        # For the detection summary, aggregate the number of detections from the processed frames.
+        # (Note: For simplicity, this uses the detections from the first processed frame if available.)
+        num_detections = "See individual frames"  # More elaborate aggregation logic can be added.
+        return tmp.name, process_time, num_detections
     except Exception as e:
+        return f"Error during video inference: {e}", None, None
+# Main inference function.
+# It now accepts additional parameters: confidence threshold and frame step (for videos).
+# Returns a tuple with an output file path and a JSON-like dictionary with metadata.
+def inference(model_file, input_media, media_type, conf, frame_step):
     model = load_model(model_file)
+    if isinstance(model, str):  # An error occurred during model loading.
+        return model, {"processing_time": None, "detections": None}
+    # Process according to media type.
     if media_type == "Image":
+        out_file, process_time, detections = predict_image(model, input_media, conf)
+        # For API users, return both the output file path and a dictionary with metadata.
+        metadata = {"processing_time": process_time, "detections": detections}
+        return out_file, metadata
     elif media_type == "Video":
+        out_file, process_time, detections = predict_video(model, input_media, conf, frame_step)
+        metadata = {"processing_time": process_time, "detections": detections}
+        return out_file, metadata
     else:
+        return "Unsupported media type", {"processing_time": None, "detections": None}
 # Define Gradio interface components.
+# File upload for the custom YOLO model (a .pt file).
 model_file_input = gr.File(label="Upload Custom YOLO Model (.pt file)")
+# File upload for the image or video.
 media_file_input = gr.File(label="Upload Image/Video File")
+# Radio button for selecting media type.
 media_type_dropdown = gr.Radio(choices=["Image", "Video"], label="Select Media Type", value="Image")
+# Confidence slider (minimum detection confidence).
+confidence_slider = gr.Slider(minimum=0.1, maximum=1.0, step=0.05, value=0.5, label="Detection Confidence Threshold")
+# Frame step slider for video (how many frames to skip between processing).
+frame_step_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Frame Step (for Video Processing)")
+# We define two outputs:
+# 1. A File output that will show the annotated image or video.
+# 2. A JSON/Text output that reports processing time and detections.
+output_file = gr.File(label="Processed Output")
+output_metadata = gr.JSON(label="Metadata")
+# Create the Gradio interface.
+# Note: For API clients, the JSON output (metadata) gives additional info on processing.
 iface = gr.Interface(
     fn=inference,
+    inputs=[model_file_input, media_file_input, media_type_dropdown, confidence_slider, frame_step_slider],
+    outputs=[output_file, output_metadata],
+    title="Enhanced Custom YOLO Model Inference",
     description=(
+        "Upload your custom YOLO model (supports detection, segmentation, or OBB), along with an image or video file. "
+        "Use the sliders to adjust the detection confidence and (for videos) the frame step for real-time performance. "
+        "The app returns an annotated output file and metadata (processing time and detection summary) for API use."
     )
 )