Alessio Grancini
Update app.py
8dda9f5 verified
raw
history blame
8.93 kB
from ultralytics import YOLO
import cv2
import gradio as gr
import numpy as np
import os
import torch
import utils
import plotly.graph_objects as go
import spaces
from image_segmenter import ImageSegmenter
from monocular_depth_estimator import MonocularDepthEstimator
from point_cloud_generator import display_pcd
# params
CANCEL_PROCESSING = False
# Initialize models (but actual loading happens in decorated functions)
img_seg = ImageSegmenter(model_type="yolov8s-seg")
depth_estimator = MonocularDepthEstimator(model_type="midas_v21_small_256")
@spaces.GPU(duration=30) # Adjust duration based on your needs
def process_image(image):
image = utils.resize(image)
image_segmentation, objects_data = img_seg.predict(image)
depthmap, depth_colormap = depth_estimator.make_prediction(image)
dist_image = utils.draw_depth_info(image, depthmap, objects_data)
objs_pcd = utils.generate_obj_pcd(depthmap, objects_data)
plot_fig = display_pcd(objs_pcd)
return image_segmentation, depth_colormap, dist_image, plot_fig
@spaces.GPU(duration=30)
def test_process_img(image):
image = utils.resize(image)
image_segmentation, objects_data = img_seg.predict(image)
depthmap, depth_colormap = depth_estimator.make_prediction(image)
return image_segmentation, objects_data, depthmap, depth_colormap
@spaces.GPU(duration=60) # Longer duration for video processing
def process_video(vid_path=None):
vid_cap = cv2.VideoCapture(vid_path)
while vid_cap.isOpened():
ret, frame = vid_cap.read()
if ret:
print("making predictions ....")
frame = utils.resize(frame)
image_segmentation, objects_data = img_seg.predict(frame)
depthmap, depth_colormap = depth_estimator.make_prediction(frame)
dist_image = utils.draw_depth_info(frame, depthmap, objects_data)
yield cv2.cvtColor(image_segmentation, cv2.COLOR_BGR2RGB), depth_colormap, cv2.cvtColor(dist_image, cv2.COLOR_BGR2RGB)
return None
def update_segmentation_options(options):
img_seg.is_show_bounding_boxes = True if 'Show Boundary Box' in options else False
img_seg.is_show_segmentation = True if 'Show Segmentation Region' in options else False
img_seg.is_show_segmentation_boundary = True if 'Show Segmentation Boundary' in options else False
def update_confidence_threshold(thres_val):
img_seg.confidence_threshold = thres_val/100
@spaces.GPU(duration=10) # Short duration for model loading
def model_selector(model_type):
global img_seg, depth_estimator
if "Small - Better performance and less accuracy" == model_type:
midas_model, yolo_model = "midas_v21_small_256", "yolov8s-seg"
elif "Medium - Balanced performance and accuracy" == model_type:
midas_model, yolo_model = "dpt_hybrid_384", "yolov8m-seg"
elif "Large - Slow performance and high accuracy" == model_type:
midas_model, yolo_model = "dpt_large_384", "yolov8l-seg"
else:
midas_model, yolo_model = "midas_v21_small_256", "yolov8s-seg"
img_seg = ImageSegmenter(model_type=yolo_model)
depth_estimator = MonocularDepthEstimator(model_type=midas_model)
def cancel():
global CANCEL_PROCESSING
CANCEL_PROCESSING = True
if __name__ == "__main__":
# gradio gui app
with gr.Blocks() as my_app:
# title
gr.Markdown("<h1><center>Simultaneous Segmentation and Depth Estimation</center></h1>")
gr.Markdown("<h3><center>Created by Vaishanth</center></h3>")
gr.Markdown("<h3><center>This model estimates the depth of segmented objects.</center></h3>")
# tabs
with gr.Tab("Image"):
with gr.Row():
with gr.Column(scale=1):
img_input = gr.Image()
model_type_img = gr.Dropdown(
["Small - Better performance and less accuracy",
"Medium - Balanced performance and accuracy",
"Large - Slow performance and high accuracy"],
label="Model Type", value="Small - Better performance and less accuracy",
info="Select the inference model before running predictions!")
options_checkbox_img = gr.CheckboxGroup(["Show Boundary Box", "Show Segmentation Region", "Show Segmentation Boundary"], label="Options")
conf_thres_img = gr.Slider(1, 100, value=60, label="Confidence Threshold", info="Choose the threshold above which objects should be detected")
submit_btn_img = gr.Button(value="Predict")
with gr.Column(scale=2):
with gr.Row():
segmentation_img_output = gr.Image(height=300, label="Segmentation")
depth_img_output = gr.Image(height=300, label="Depth Estimation")
with gr.Row():
dist_img_output = gr.Image(height=300, label="Distance")
pcd_img_output = gr.Plot(label="Point Cloud")
gr.Markdown("## Sample Images")
gr.Examples(
examples=[os.path.join(os.path.dirname(__file__), "assets/images/baggage_claim.jpg"),
os.path.join(os.path.dirname(__file__), "assets/images/kitchen_2.png"),
os.path.join(os.path.dirname(__file__), "assets/images/soccer.jpg"),
os.path.join(os.path.dirname(__file__), "assets/images/room_2.png"),
os.path.join(os.path.dirname(__file__), "assets/images/living_room.jpg")],
inputs=img_input,
outputs=[segmentation_img_output, depth_img_output, dist_img_output, pcd_img_output],
fn=process_image,
cache_examples=True,
)
with gr.Tab("Video"):
with gr.Row():
with gr.Column(scale=1):
vid_input = gr.Video()
model_type_vid = gr.Dropdown(
["Small - Better performance and less accuracy",
"Medium - Balanced performance and accuracy",
"Large - Slow performance and high accuracy"],
label="Model Type", value="Small - Better performance and less accuracy",
info="Select the inference model before running predictions!")
options_checkbox_vid = gr.CheckboxGroup(["Show Boundary Box", "Show Segmentation Region", "Show Segmentation Boundary"], label="Options")
conf_thres_vid = gr.Slider(1, 100, value=60, label="Confidence Threshold", info="Choose the threshold above which objects should be detected")
with gr.Row():
cancel_btn = gr.Button(value="Cancel")
submit_btn_vid = gr.Button(value="Predict")
with gr.Column(scale=2):
with gr.Row():
segmentation_vid_output = gr.Image(height=300, label="Segmentation")
depth_vid_output = gr.Image(height=300, label="Depth Estimation")
with gr.Row():
dist_vid_output = gr.Image(height=300, label="Distance")
gr.Markdown("## Sample Videos")
gr.Examples(
examples=[os.path.join(os.path.dirname(__file__), "assets/videos/input_video.mp4"),
os.path.join(os.path.dirname(__file__), "assets/videos/driving.mp4"),
os.path.join(os.path.dirname(__file__), "assets/videos/overpass.mp4"),
os.path.join(os.path.dirname(__file__), "assets/videos/walking.mp4")],
inputs=vid_input,
)
# image tab logic
submit_btn_img.click(process_image, inputs=img_input, outputs=[segmentation_img_output, depth_img_output, dist_img_output, pcd_img_output])
options_checkbox_img.change(update_segmentation_options, options_checkbox_img, [])
conf_thres_img.change(update_confidence_threshold, conf_thres_img, [])
model_type_img.change(model_selector, model_type_img, [])
# video tab logic
submit_btn_vid.click(process_video, inputs=vid_input, outputs=[segmentation_vid_output, depth_vid_output, dist_vid_output])
model_type_vid.change(model_selector, model_type_vid, [])
cancel_btn.click(cancel, inputs=[], outputs=[])
options_checkbox_vid.change(update_segmentation_options, options_checkbox_vid, [])
conf_thres_vid.change(update_confidence_threshold, conf_thres_vid, [])
# Launch with appropriate queue settings for ZeroGPU
my_app.queue(concurrency_count=1, max_size=10).launch()