File size: 4,417 Bytes
8166792
 
 
 
 
4f0cfe1
8166792
 
661e202
4f0cfe1
 
661e202
8166792
661e202
8166792
 
 
 
 
 
 
661e202
 
4f0cfe1
 
 
8166792
 
 
 
 
 
 
 
 
 
 
 
4f0cfe1
661e202
4f0cfe1
 
 
 
 
661e202
 
 
4f0cfe1
 
8166792
 
 
 
 
4f0cfe1
8166792
 
 
 
 
 
 
 
 
 
 
4f0cfe1
661e202
 
 
 
8166792
661e202
 
 
8166792
661e202
 
 
8166792
4f0cfe1
661e202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8166792
4f0cfe1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import cv2
import numpy as np
from ultralytics import YOLO
import random
import torch
import spaces

class ImageSegmenter:
    def __init__(self, model_type="yolov8s-seg") -> None:
        self.model_type = model_type
        self.device = 'cuda'  # ZeroGPU will always use CUDA
        self.is_show_bounding_boxes = True
        self.is_show_segmentation_boundary = False
        self.is_show_segmentation = False
        self.confidence_threshold = 0.5
        self.cls_clr = {}

        # params
        self.bb_thickness = 2
        self.bb_clr = (255, 0, 0)

        # variables
        self.masks = {}
        
        # Model will be loaded in predict to work with ZeroGPU
        self.model = None

    def get_cls_clr(self, cls_id):
        if cls_id in self.cls_clr:
            return self.cls_clr[cls_id]
        
        # gen rand color
        r = random.randint(50, 200)
        g = random.randint(50, 200)
        b = random.randint(50, 200)
        self.cls_clr[cls_id] = (r, g, b)
        return (r, g, b)

    @spaces.GPU(duration=30)  # Adjust duration based on your needs
    def predict(self, image):            
        # Load model if not loaded (will happen on first prediction)
        if self.model is None:
            self.model = YOLO('models/' + self.model_type + '.pt')
            self.model.to(self.device)

        # params
        objects_data = [] 
        image = image.copy()
        
        # Run prediction
        predictions = self.model.predict(image)

        cls_ids = predictions[0].boxes.cls.cpu().numpy()
        bounding_boxes = predictions[0].boxes.xyxy.int().cpu().numpy()        
        cls_conf = predictions[0].boxes.conf.cpu().numpy()
        
        # segmentation
        if predictions[0].masks:
            seg_mask_boundary = predictions[0].masks.xy
            seg_mask = predictions[0].masks.data.cpu().numpy()  
        else:
            seg_mask_boundary, seg_mask = [], np.array([])    
        
        for id, cls in enumerate(cls_ids):
            cls_clr = self.get_cls_clr(cls)

            # draw filled segmentation region
            if seg_mask.any() and cls_conf[id] > self.confidence_threshold:
                self.masks[id] = seg_mask[id]
                
                if self.is_show_segmentation:
                    alpha = 0.8                

                    # converting the mask from 1 channel to 3 channels
                    colored_mask = np.expand_dims(seg_mask[id], 0).repeat(3, axis=0)
                    colored_mask = np.moveaxis(colored_mask, 0, -1)

                    # Resize the mask to match the image size, if necessary
                    if image.shape[:2] != seg_mask[id].shape[:2]:
                        colored_mask = cv2.resize(colored_mask, (image.shape[1], image.shape[0]))

                    # filling the masked area with class color
                    masked = np.ma.MaskedArray(image, mask=colored_mask, fill_value=cls_clr)
                    image_overlay = masked.filled()                
                    image = cv2.addWeighted(image, 1 - alpha, image_overlay, alpha, 0)

                # draw bounding box with class name and score
                if self.is_show_bounding_boxes and cls_conf[id] > self.confidence_threshold:
                    (x1, y1, x2, y2) = bounding_boxes[id]
                    cls_name = self.model.names[cls]
                    cls_confidence = cls_conf[id]
                    disp_str = cls_name +' '+ str(round(cls_confidence, 2))
                    cv2.rectangle(image, (x1, y1), (x2, y2), cls_clr, self.bb_thickness)
                    cv2.rectangle(image, (x1, y1), (x1+(len(disp_str)*9), y1+15), cls_clr, -1)
                    cv2.putText(image, disp_str, (x1+5, y1+10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
                
                # draw segmentation boundary
                if len(seg_mask_boundary) and self.is_show_segmentation_boundary and cls_conf[id] > self.confidence_threshold:            
                    cv2.polylines(image, [np.array(seg_mask_boundary[id], dtype=np.int32)], isClosed=True, color=cls_clr, thickness=2)

                # object variables
                (x1, y1, x2, y2) = bounding_boxes[id]
                center = x1+(x2-x1)//2, y1+(y2-y1)//2
                objects_data.append([cls, self.model.names[cls], center, self.masks[id], cls_clr])

        return image, objects_data