File size: 10,763 Bytes
5ac1897
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import cv2
import imageio
import numpy as np

from typing import Union, Tuple, List
from pathlib import Path


def flex_resize_img(
    img    : np.ndarray,
    tgt_wh : Union[Tuple[int, int], None] = None,
    ratio  : Union[float, None] = None,
    kp_mod : int = 1,
):
    '''
    Resize the image to the target width and height. Set one of width and height to -1 to keep the aspect ratio.
    Only one of `tgt_wh` and `ratio` can be set, if both are set, `tgt_wh` will be used.

    ### Args
    - img: np.ndarray, (H, W, 3)
    - tgt_wh: Tuple[int, int], default=None
        - The target width and height, set one of them to -1 to keep the aspect ratio.
    - ratio: float, default=None
        - The ratio to resize the frames. It will be used if `tgt_wh` is not set.
    - kp_mod: int, default 1
        - Keep the width and height as multiples of `kp_mod`.
        - For example, if `kp_mod=16`, the width and height will be rounded to the nearest multiple of 16.

    ### Returns
    - np.ndarray, (H', W', 3)
        - The resized iamges.
    '''
    assert len(img.shape) == 3, 'img must have 3 dimensions.'
    return flex_resize_video(img[None], tgt_wh, ratio, kp_mod)[0]


def flex_resize_video(
    frames : np.ndarray,
    tgt_wh : Union[Tuple[int, int], None] = None,
    ratio  : Union[float, None] = None,
    kp_mod : int = 1,
):
    '''
    Resize the frames to the target width and height. Set one of width and height to -1 to keep the aspect ratio.
    Only one of `tgt_wh` and `ratio` can be set, if both are set, `tgt_wh` will be used.

    ### Args
    - frames: np.ndarray, (L, H, W, 3)
    - tgt_wh: Tuple[int, int], default=None
        - The target width and height, set one of them to -1 to keep the aspect ratio.
    - ratio: float, default=None
        - The ratio to resize the frames. It will be used if `tgt_wh` is not set.
    - kp_mod: int, default 1
        - Keep the width and height as multiples of `kp_mod`.
        - For example, if `kp_mod=16`, the width and height will be rounded to the nearest multiple of 16.

    ### Returns
    - np.ndarray, (L, H', W', 3)
        - The resized frames.
    '''
    assert tgt_wh is not None or ratio is not None, 'At least one of tgt_wh and ratio must be set.'
    if tgt_wh is not None:
        assert len(tgt_wh) == 2, 'tgt_wh must be a tuple of 2 elements.'
        assert tgt_wh[0] > 0 or tgt_wh[1] > 0, 'At least one of width and height must be positive.'
    if ratio is not None:
        assert ratio > 0, 'ratio must be positive.'
    assert len(frames.shape) == 4, 'frames must have 3 or 4 dimensions.'

    def align_size(val:float):
        ''' It will round the value to the nearest multiple of `kp_mod`. '''
        return int(round(val / kp_mod) * kp_mod)

    # Calculate the target width and height.
    orig_h, orig_w = frames.shape[1], frames.shape[2]
    tgt_wh = (int(orig_w * ratio), int(orig_h * ratio)) if tgt_wh is None else tgt_wh  # Get wh from ratio if not given. # type: ignore
    tgt_w, tgt_h = tgt_wh
    tgt_w = align_size(orig_w * tgt_h / orig_h) if tgt_w == -1 else align_size(tgt_w)
    tgt_h = align_size(orig_h * tgt_w / orig_w) if tgt_h == -1 else align_size(tgt_h)
    # Resize the frames.
    resized_frames = np.stack([cv2.resize(frame, (tgt_w, tgt_h)) for frame in frames])

    return resized_frames


def splice_img(
    img_grids : Union[List[np.ndarray], np.ndarray],
    grid_ids  : Union[List[int], np.ndarray],
):
    '''
    Splice the images with the same size, according to the grid index.
    For example, you have 3 images [i1, i2, i3], and a `grid_ids` matrix:
    [[ 0,  1],                              |i1|i2|
     [ 2, -1],  , then the results will be  |i3|ib|  , where ib means a black place holder.
     [-1, -1]]                              |ib|ib|

    ### Args
    - img_grids: List[np.ndarray] or np.ndarray, (K, H, W, 3)
        - The source images to splice. It indicates that all the images have the same size.
    - grid_ids: List[int] or np.ndarray, (Y, X)
        - The grid index of each image. It should be a 2D matrix with integers as the type of elements.
        - The value in this matrix indexed the image in the `video_grids`, so it ranges from 0 to K-1.
        - Specially, set the grid index to -1 to use a black place holder.

    ### Returns
    - np.ndarray, (H*Y, W*X, 3)
        - The spliced images.
    '''
    if isinstance(img_grids, List):
        img_grids = np.stack(img_grids)
    if isinstance(grid_ids, List):
        grid_ids = np.array(grid_ids)

    assert len(img_grids.shape) == 4, 'img_grids must be in shape (K, H, W, 3).'
    return splice_video(img_grids[:, None], grid_ids)[0]


def splice_video(
    video_grids : Union[List[np.ndarray], np.ndarray],
    grid_ids    : Union[List[int], np.ndarray],
):
    '''
    Splice the videos with the same size, according to the grid index.
    For example, you have 3 videos [v1, v2, v3], and a `grid_ids` matrix:
    [[ 0,  1],                              |v1|v2|
     [ 2, -1],  , then the results will be  |v3|vb|  , wher vb means a black place holder.
     [-1, -1]]                              |vb|vb|

    ### Args
    - video_grids: List[np.ndarray] or np.ndarray, (K, L, H, W, C)
        - The source videos to splice. It indicates that all the videos have the same size.
    - grid_ids: List[int] or np.ndarray, (Y, X)
        - The grid index of each video. It should be a 2D matrix with integers as the type of elements.
        - The value in this matrix indexed the video in the `video_grids`, so it ranges from 0 to K-1.
        - Specially, set the grid index to -1 to use a black place holder.

    ### Returns
    - np.ndarray, (L, H*Y, W*X, C)
        - The spliced video.
    '''
    if isinstance(video_grids, List):
        video_grids = np.stack(video_grids)
    if isinstance(grid_ids, List):
        grid_ids = np.array(grid_ids)

    assert len(video_grids.shape) == 5, 'video_grids must be in shape (K, L, H, W, 3).'
    assert len(grid_ids.shape) == 2, 'grid_ids must be a 2D matrix.'
    assert isinstance(grid_ids[0, 0].item(), int), f'grid_ids must be an integer matrix, but got {grid_ids.dtype}.'

    # Splice the videos.
    K, L, H, W, C = video_grids.shape
    Y, X = grid_ids.shape

    # Initialize the spliced video.
    spliced_video = np.zeros((L, H*Y, W*X, C), dtype=np.uint8)
    for x in range(X):
        for y in range(Y):
            grid_id = grid_ids[y, x]
            if grid_id == -1:
                continue
            spliced_video[:, y*H:(y+1)*H, x*W:(x+1)*W, :] = video_grids[grid_id]

    return spliced_video


def crop_img(
    img  : np.ndarray,
    lurb : Union[np.ndarray, List],
):
    '''
    Crop the image with the given bounding box.
    The data should be represented in uint8.
    If the bounding box is out of the image, pad the image with zeros.

    ### Args
    - img: np.ndarray, (H, W, C)
    - lurb: np.ndarray or list, (4,)
        - The bounding box in the format of left, up, right, bottom.

    ### Returns
    - np.ndarray, (H', W', C)
        - The cropped image.
    '''

    return crop_video(img[None], lurb)[0]


def crop_video(
    frames : np.ndarray,
    lurb   : Union[np.ndarray, List],
):
    '''
    Crop the video with the given bounding box. 
    The data should be represented in uint8.
    If the bounding box is out of the video, pad the frames with zeros.

    ### Args
    - frames: np.ndarray, (L, H, W, C)
    - lurb: np.ndarray or list, (4,)
        - The bounding box in the format of left, up, right, bottom.

    ### Returns
    - np.ndarray, (L, H', W', C)
        - The cropped video.
    '''
    assert len(frames.shape) == 4, 'framess must have 4 dimensions.'
    if isinstance(lurb, List):
        lurb = np.array(lurb)

    l, u, r, b = lurb.astype(int)
    L, H, W = frames.shape[:3]
    l_, u_, r_, b_ = max(0, l), max(0, u), min(W, r), min(H, b)
    cropped_frames = np.zeros((L, b-u, r-l, 3), dtype=np.uint8)
    cropped_frames[:, u_-u:b_-u, l_-l:r_-l] = frames[:, u_:b, l_:r]

    return cropped_frames

def pad_img(
    img     : np.ndarray,
    tgt_wh  : Tuple[int, int],
    pad_val : int = 0,
    align   : str = 'c-c',
):
    '''
    Pad the image to the target width and height.

    ### Args
    - img: np.ndarray, (H, W, 3)
    - tgt_wh: Tuple[int, int]
        - The target width and height. Use -1 to indicate the original scale.
    - pad_value: int, default 0
        - The value to pad the image. 
    - align: str, default 'c-c'
        - The alignment of the image. It should be in the format of 'h-v', 
          where 'h' and 'v' can be 'l', 'c', 'r' and 't', 'c', 'b' respectively.

    ### Returns
    - np.ndarray, (H', W', 3)
        - The padded image.
    '''
    assert len(img.shape) == 3, 'img must have 3 dimensions.'
    return pad_video(img[None], tgt_wh, pad_val, align)[0]

def pad_video(
    frames  : np.ndarray,
    tgt_wh  : Tuple[int, int],
    pad_val : int = 0,
    align   : str = 'c-c',
):
    '''
    Pad the video to the target width and height.

    ### Args
    - frames: np.ndarray, (L, H, W, 3)
    - tgt_wh: Tuple[int, int]
        - The target width and height. Use -1 to indicate the original scale.
    - pad_value: int, default 0
        - The value to pad the frames.

    ### Returns
    - np.ndarray, (L, H', W', 3)
        - The padded frames.
    '''
    # Check data validity.
    assert len(frames.shape) == 4, 'frames must have 4 dimensions.'
    assert len(tgt_wh) == 2, 'tgt_wh must be a tuple of 2 elements.'
    H, W = frames.shape[1], frames.shape[2]
    if tgt_wh[0] == -1: tgt_wh = (W, tgt_wh[1])
    if tgt_wh[1] == -1: tgt_wh = (tgt_wh[0], H)
    assert tgt_wh[0] >= frames.shape[2] and tgt_wh[1] >= frames.shape[1], 'The target size must be larger than the original size.'
    assert pad_val >= 0 and pad_val <= 255, 'The pad value must be in the range of [0, 255].'
    # Check align pattern.
    align = align.split('-')
    assert len(align) == 2, 'align must be in the format of "h-v".'
    assert align[0] in ['l', 'c', 'r'] and align[1] in ['l', 'c', 'r'], 'align must be in ["l", "c", "r"].'

    tgt_w, tgt_h = tgt_wh
    pad_pix = [tgt_w - W, tgt_h - H]  # indicate how many pixels to be padded
    pad_lu  = [0, 0]  # how many pixels to pad on the left and the up side
    for direction in [0, 1]:
        if align[direction] == 'c':
            pad_lu[direction] = pad_pix[direction] // 2
        elif align[direction] == 'r':
            pad_lu[direction] = pad_pix[direction]
    pad_l, pad_r, pad_u, pad_b = pad_lu[0], pad_pix[0] - pad_lu[0], pad_lu[1], pad_pix[1] - pad_lu[1]

    padded_frames = np.pad(frames, ((0, 0), (pad_u, pad_b), (pad_l, pad_r), (0, 0)), 'constant', constant_values=pad_val)

    return padded_frames