Spaces:

anh1811
/

yolov3_cl

Runtime error

App Files Files Community

anh1811 commited on Aug 14, 2023

Commit

8d4bf19

1 Parent(s): 9f2557d

Upload 16 files

Browse files

Files changed (15) hide show

2007_base_10_10_mAP_10_10.pth (1).tar +3 -0
2007_base_15_5_mAP_15_5.pth (1).tar +3 -0
2007_base_19_1_mAP_19_1.pth (1).tar +3 -0
2007_finetune_10_10_mAP_10_10.pth.tar +3 -0
2007_finetune_15_5_mAP_15_5.pth.tar +3 -0
2007_finetune_19_1_mAP_19_1.pth (1).tar +3 -0
2007_finetune_19_1_mAP_19_1.pth.tar +3 -0
2007_image_store_base_16_5.pth +3 -0
2007_task2_10_10_mAP_10_10.pth (2) (1).tar +3 -0
2007_task2_15_5_mAP_15_5.pth (3) (1).tar +3 -0
2007_task2_19_1_mAP_19_1.pth (3) (1).tar +3 -0
app.py +408 -0
image_store_base_15_5.pth +3 -0
model.py +239 -0
requirements.txt +8 -0

2007_base_10_10_mAP_10_10.pth (1).tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f2cf61c8a0685a211ed58722fadcdc86355a6222ee542bbd585d06ac7d2d4bc
+size 739451088

2007_base_15_5_mAP_15_5.pth (1).tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:034c0239cf38e567291becb37e70a7771d13d461a38aeddffe75b7ea008df37a
+size 739772750

2007_base_19_1_mAP_19_1.pth (1).tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43ceed829c69943cb1aab99dc5303b321ba1ffc864a11d9fcb802f8baa21e668
+size 740030798

2007_finetune_10_10_mAP_10_10.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0863385a77abf7d1c68c1092bf5b5a8c93c8a09830da13decc006934b5736304
+size 739675028

2007_finetune_15_5_mAP_15_5.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7aa374db868c2163fd857831ed83d92e975c820e47c40682c43eb7663a6d407
+size 739888146

2007_finetune_19_1_mAP_19_1.pth (1).tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84e42744ffd5b084cba987618bfbf44d139f1ab1a8c32ab936bfa57a9e7e809a
+size 740060242

2007_finetune_19_1_mAP_19_1.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91c85c59ddbdf40fe36ae6c59ee02a0751a03f7cce22bf426c64c6c8042573fa
+size 740060242

2007_image_store_base_16_5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5eda9ef842bb084d651296c5b7345060ecf166bcab17e39ead080204a43f15ef
+size 21679

2007_task2_10_10_mAP_10_10.pth (2) (1).tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e49103fe59a4101cb9a43b56d7c1a8263ef947665ed470b5f88e534d79759d6d
+size 739671601

2007_task2_15_5_mAP_15_5.pth (3) (1).tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f890ee29b5e4367474b66079572a5f0558587db5e77d81d80d0deb182c72d7c
+size 739884783

2007_task2_19_1_mAP_19_1.pth (3) (1).tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4a92093f22e10382a2da0fc38f94e5200d4d6af2ea0feac6dc2593a478e7b31
+size 740056815

app.py ADDED Viewed

	@@ -0,0 +1,408 @@

+import streamlit as st
+# import config
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import math
+from PIL import Image
+# import wandb
+from model import YOLOv3
+import cv2
+IMAGE_SIZE = 416
+ANCHORS = [
+    [(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)],
+    [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)],
+    [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)],
+]
+S = [IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8]
+infer_transforms = A.Compose(
+    [
+        A.LongestMaxSize(max_size=IMAGE_SIZE),
+        A.PadIfNeeded(
+            min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
+        ),
+        A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
+        ToTensorV2(),
+    ]
+)
+def cells_to_bboxes(predictions, anchors, S, is_preds=True):
+    """
+    Scales the predictions coming from the model to
+    be relative to the entire image such that they for example later
+    can be plotted or.
+    INPUT:
+    predictions: tensor of size (N, 3, S, S, num_classes+5)
+    anchors: the anchors used for the predictions
+    S: the number of cells the image is divided in on the width (and height)
+    is_preds: whether the input is predictions or the true bounding boxes
+    OUTPUT:
+    converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
+                      object score, bounding box coordinates
+    """
+    BATCH_SIZE = predictions.shape[0]
+    num_anchors = len(anchors)
+    box_predictions = predictions[..., 1:5]
+    if is_preds:
+        anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
+        box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
+        box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors
+        scores = torch.sigmoid(predictions[..., 0:1])
+        best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
+    else:
+        scores = predictions[..., 0:1]
+        best_class = predictions[..., 5:6]
+    cell_indices = (
+        torch.arange(S)
+        .repeat(predictions.shape[0], 3, S, 1)
+        .unsqueeze(-1)
+        .to(predictions.device)
+    )
+    x = 1 / S * (box_predictions[..., 0:1] + cell_indices)
+    y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4))
+    w_h = 1 / S * box_predictions[..., 2:4]
+    converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(BATCH_SIZE, num_anchors * S * S, 6)
+    return converted_bboxes.tolist()
+def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
+    """
+    Video explanation of this function:
+    https://youtu.be/YDkjWEN8jNA
+    Does Non Max Suppression given bboxes
+    Parameters:
+        bboxes (list): list of lists containing all bboxes with each bboxes
+        specified as [class_pred, prob_score, x1, y1, x2, y2]
+        iou_threshold (float): threshold where predicted bboxes is correct
+        threshold (float): threshold to remove predicted bboxes (independent of IoU)
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+    Returns:
+        list: bboxes after performing NMS given a specific IoU threshold
+    """
+    assert type(bboxes) == list
+    bboxes = [box for box in bboxes if box[1] > threshold]
+    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
+    bboxes_after_nms = []
+    while bboxes:
+        chosen_box = bboxes.pop(0)
+        bboxes = [
+            box
+            for box in bboxes
+            if box[0] != chosen_box[0]
+            or intersection_over_union(
+                torch.tensor(chosen_box[2:]),
+                torch.tensor(box[2:]),
+                box_format=box_format,
+            )
+            < iou_threshold
+        ]
+        bboxes_after_nms.append(chosen_box)
+    return bboxes_after_nms
+def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint", GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    """
+    Video explanation of this function:
+    https://youtu.be/XXYG5ZWtjj0
+    This function calculates intersection over union (iou) given pred boxes
+    and target boxes.
+    Parameters:
+        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
+        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
+        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
+    Returns:
+        tensor: Intersection over union for all examples
+    """
+    if box_format == "midpoint":
+        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
+        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
+        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
+        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
+        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
+        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
+        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
+        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
+        w1 = boxes_preds[..., 2:3]
+        h1 = boxes_preds[..., 3:4]
+        w2 = boxes_labels[..., 2:3]
+        h2 = boxes_labels[..., 3:4]
+    if box_format == "corners":
+        box1_x1 = boxes_preds[..., 0:1]
+        box1_y1 = boxes_preds[..., 1:2]
+        box1_x2 = boxes_preds[..., 2:3]
+        box1_y2 = boxes_preds[..., 3:4]
+        box2_x1 = boxes_labels[..., 0:1]
+        box2_y1 = boxes_labels[..., 1:2]
+        box2_x2 = boxes_labels[..., 2:3]
+        box2_y2 = boxes_labels[..., 3:4]
+    x1 = torch.max(box1_x1, box2_x1)
+    y1 = torch.max(box1_y1, box2_y1)
+    x2 = torch.min(box1_x2, box2_x2)
+    y2 = torch.min(box1_y2, box2_y2)
+    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
+    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
+    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
+    iou = intersection / (box1_area + box2_area - intersection)
+    if CIoU or DIoU or GIoU:
+        cw = box1_x2.maximum(box2_x2) - box1_x1.minimum(box2_x1)  # convex (smallest enclosing box) width
+        ch = box1_y2.maximum(box2_y2) - box1_y1.minimum(box2_y1)  # convex height
+        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
+            rho2 = ((box2_x1 + box2_x2 - box1_x1 - box1_x2) ** 2 + (box2_y1 + box2_y2 - box1_y1 - box1_y2) ** 2) / 4  # center dist ** 2
+            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2)
+                with torch.no_grad():
+                    alpha = v / (v - iou + (1 + eps))
+                return iou - (rho2 / c2 + v * alpha)  # CIoU
+            return iou - rho2 / c2  # DIoU
+        c_area = cw * ch + eps  # convex area
+        return iou - (c_area - intersection) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+    return intersection / (box1_area + box2_area - intersection + 1e-6)
+def resize_box(box, origin_dims, in_dims):
+    # amount of padding
+    h_ori, w_ori = origin_dims[0], origin_dims[1]
+    print(h_ori, w_ori)
+    padding_height = max(w_ori - h_ori, 0) * in_dims/w_ori
+    padding_width =  max(h_ori - w_ori, 0) * in_dims/h_ori
+    #picture size after remove pad
+    h_new = in_dims - padding_height
+    w_new = in_dims - padding_width
+    # resize box
+    box[0] = (box[0] - padding_width//2)* w_ori/w_new
+    box[1] = (box[1] - padding_height//2)* h_ori/h_new
+    box[2] = (box[2] - padding_width//2)* w_ori/w_new
+    box[3] = (box[3] - padding_height//2)* h_ori/h_new
+    return box
+def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
+    # Rescale boxes (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+    boxes[..., [0, 2]] -= pad[0]  # x padding
+    boxes[..., [1, 3]] -= pad[1]  # y padding
+    boxes[..., :4] /= gain
+    clip_boxes(boxes, img0_shape)
+    return boxes
+def clip_boxes(boxes, shape):
+    # Clip boxes (xyxy) to image shape (height, width)
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[..., 0].clamp_(0, shape[1])  # x1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
+        boxes[..., 2].clamp_(0, shape[1])  # x2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
+def plot_image(image, boxes, image_ori=None):
+    import pickle as pkl
+    """Plots predicted bounding boxes on the image"""
+    # cmap = plt.get_cmap("tab20b")
+    class_labels = [
+    "aeroplane",
+    "bicycle",
+    "bird",
+    "boat",
+    "bottle",
+    "bus",
+    "car",
+    "cat",
+    "chair",
+    "cow",
+    "diningtable",
+    "dog",
+    "horse",
+    "motorbike",
+    "person",
+    "pottedplant",
+    "sheep",
+    "sofa",
+    "train",
+    "tvmonitor"
+    ]
+    colors = pkl.load(open("pallete", "rb"))
+    im = np.array(image)
+    height, width, _ = im.shape
+    # Draw bounding boxes on the image
+    for box in boxes:
+        assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
+        class_pred = box[0]
+        conf = box[1]
+        box = box[2:]
+        box_clone = box.copy()
+        box[0] = max(box_clone[0] - box_clone[2] / 2, 0.) * width
+        box[1] = max(box_clone[1] - box_clone[3] / 2, 0.) * height
+        box[2] = min(box_clone[0] + box_clone[2] / 2, 1.) * width
+        box[3] = min(box_clone[1] + box_clone[3] / 2, 1.) * height
+        box = scale_boxes((height, width), torch.tensor(box), image_ori.shape[:2])
+        h_o, w_o, _ = image_ori.shape
+        color = colors[int(class_pred)]
+        # print(color)
+        # Draw rectangle
+        cv2.rectangle(image_ori, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
+        label = class_labels[int(class_pred)]
+        text = f"{label}: {conf:.2f}"
+        cv2.putText(image_ori, text, (int(box[0]), int(box[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+    return image_ori
+    # cv2.imwrite("test.png", image_ori)
+def infer(model, img, thresh, iou_thresh, anchors):
+    model.eval()
+    image = np.array(img)
+    image_copy = image.copy()
+    # image = image[np.newaxis, :]
+    augmentations = infer_transforms(image=image)
+    x = augmentations["image"]
+    # x = x.to("cuda")
+    x = torch.reshape(x, [1,3,416,416])
+    # print(x.shape)
+    with torch.no_grad():
+        out = model(x)
+        bboxes = [[] for _ in range(x.shape[0])]
+        for i in range(3):
+            batch_size, A, S, _, _ = out[i].shape
+            anchor = anchors[i]
+            boxes_scale_i = cells_to_bboxes(
+                out[i], anchor, S=S, is_preds=True
+            )
+            for idx, (box) in enumerate(boxes_scale_i):
+                bboxes[idx] += box
+    for i in range(batch_size):
+        nms_boxes = non_max_suppression(
+            bboxes[i], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint",
+        )
+        img = plot_image(x[i].permute(1,2,0).detach().cpu(), nms_boxes, image_copy)
+    return img
+scene = st.radio(
+    "Chọn bối cảnh",
+    ('19->20', '15->20', '10->20'))
+# scene = '19->20'
+# task = st.radio(
+#     "Chọn nhiệm vụ",
+#     ('task1', 'task2', 'finetune'))
+all = 20
+if scene == '19->20':
+    base = 19
+    new = all - base
+elif scene == '15->20':
+    base = 15
+    new = all - base
+else:
+    base = 10
+    new = all - base
+# if task == '1.Nhiệm vụ 1':
+#     cls = base
+#     task = 'task1'
+# elif task == '2. Nhiệm vụ 2 (trước tinh chỉnh)':
+#     cls = all
+#     tune = False
+# else:
+#     cls = all
+#     tune = True
+device = "cuda"
+if not torch.cuda.is_available():
+    device = "cpu"
+scaled_anchors = (
+        torch.tensor(ANCHORS)
+        * torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
+    ).to(device)
+uploaded_file = st.file_uploader("Chọn hình ảnh...", type=["jpg", "jpeg", "png"])
+# uploaded_file = '/home/ngocanh/Documents/final_thesis/code/dataset/10_10/base/images/test/000011.jpg'
+image = Image.open(uploaded_file)
+print("Thuc hien bien doi")
+#task 1
+file_path = f"2007_base_{base}_{new}_mAP_{base}_{new}.pth.tar"
+model = YOLOv3(num_classes=base).to(device)
+checkpoint = torch.load(file_path, map_location=device)
+model.load_state_dict(checkpoint["state_dict"])
+model.eval()
+image_1 = infer(model, image, 0.5, 0.5, scaled_anchors)
+#task 2
+file_path = f"2007_task2_{base}_{new}_mAP_{base}_{new}.pth.tar"
+model = YOLOv3(num_classes=all).to(device)
+checkpoint = torch.load(file_path, map_location=device)
+model.load_state_dict(checkpoint["state_dict"])
+model.eval()
+image_2 = infer(model, image, 0.5, 0.5, scaled_anchors)
+#ft
+file_path = f"2007_finetune_{base}_{new}_mAP_{base}_{new}.pth.tar"
+checkpoint = torch.load(file_path, map_location=device)
+model.load_state_dict(checkpoint["state_dict"])
+model.eval()
+image_3 = infer(model, image, 0.5, 0.5, scaled_anchors)
+# Streamlit App
+# Widget tải lên file ảnh
+# note = Image.open("note.png")
+# st.image(note, width=150)
+col1, col2, col3, col4 = st.columns(4)
+with col1:
+    st.image(image, caption="Ảnh đầu vào", use_column_width=True)
+with col2:
+    st.image(image_1, caption="Kết quả task 1", channels="BGR", use_column_width=True)
+with col3:
+    st.image(image_1, caption="Kết quả task 2 (no finetune)", channels="BGR", use_column_width=True)
+with col4:
+    st.image(image_1, caption="Kết quả task 2 (finetune)", channels="BGR", use_column_width=True)
+# import cv2
+# image_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2RGB)
+# cv2.imwrite('test.jpg',image_1)
+    # Hiển thị ảnh gốc
+    # TODO: Đưa ảnh qua mô hình để xử lý (đoán, biến đổi, ...)
+    # Hiển thị kết quả (ảnh sau khi qua mô hình), nếu có
+    # Ví dụ: Nếu bạn đã có kết quả từ mô hình (processed_img) là một PIL Image
+    # st.image(processed_img, caption="Processed Image", use_column_width=True)

image_store_base_15_5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33771fc0994f203637a205e5efb5fbb76300fd5f7cf4844d7dbe71acca1dec24
+size 13231

model.py ADDED Viewed

	@@ -0,0 +1,239 @@

+"""
+Implementation of YOLOv3 architecture
+"""
+import torch
+import torch.nn as nn
+import config as cfg
+"""
+Information about architecture config:
+Tuple is structured by (filters, kernel_size, stride)
+Every conv is a same convolution.
+List is structured by "B" indicating a residual block followed by the number of repeats
+"S" is for scale prediction block and computing the yolo loss
+"U" is for upsampling the feature map and concatenating with a previous layer
+"""
+config = [
+    (32, 3, 1),
+    (64, 3, 2),
+    ["B", 1],
+    (128, 3, 2),
+    ["B", 2],
+    (256, 3, 2),
+    ["B", 8],
+    (512, 3, 2),
+    ["B", 8],
+    (1024, 3, 2),
+    ["B", 4],  # To this point is Darknet-53
+    (512, 1, 1),
+    (1024, 3, 1),
+    "S",
+    (256, 1, 1),
+    "U",
+    (256, 1, 1),
+    (512, 3, 1),
+    "S",
+    (128, 1, 1),
+    "U",
+    (128, 1, 1),
+    (256, 3, 1),
+    "S",
+]
+class CNNBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, bn_act=True, **kwargs):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, bias=not bn_act, **kwargs)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.leaky = nn.LeakyReLU(0.1)
+        self.use_bn_act = bn_act
+    def forward(self, x):
+        if self.use_bn_act:
+            return self.leaky(self.bn(self.conv(x)))
+        else:
+            return self.conv(x)
+class ResidualBlock(nn.Module):
+    def __init__(self, channels, use_residual=True, num_repeats=1):
+        super().__init__()
+        self.layers = nn.ModuleList()
+        for repeat in range(num_repeats):
+            self.layers += [
+                nn.Sequential(
+                    CNNBlock(channels, channels // 2, kernel_size=1),
+                    CNNBlock(channels // 2, channels, kernel_size=3, padding=1),
+                )
+            ]
+        self.use_residual = use_residual
+        self.num_repeats = num_repeats
+    def forward(self, x):
+        for layer in self.layers:
+            if self.use_residual:
+                x = x + layer(x)
+            else:
+                x = layer(x)
+        return x
+class ScalePrediction(nn.Module):
+    def __init__(self, in_channels, num_classes):
+        super().__init__()
+        self.pred = nn.Sequential(
+            CNNBlock(in_channels, 2 * in_channels, kernel_size=3, padding=1),
+            CNNBlock(
+                2 * in_channels, (num_classes + 5) * 3, bn_act=False, kernel_size=1
+            ),
+        )
+        self.num_classes = num_classes
+    def forward(self, x):
+        return (
+            self.pred(x)
+            .reshape(x.shape[0], 3, -1 , x.shape[2], x.shape[3])
+            .permute(0, 1, 3, 4, 2)
+        )
+class YOLOv3(nn.Module):
+    def __init__(self, in_channels=3, num_classes=80):
+        super().__init__()
+        self.num_classes = num_classes
+        self.in_channels = in_channels
+        self.layers = self._create_conv_layers()
+        self.base_model = None
+        self.distill_feature = cfg.DISTILL
+        self.warp = cfg.WARP
+        self.feature_store = None
+        self.enable_warp_train = False
+    def get_features(self):
+        return self.features
+    def adaptation(self, layer_id, num_class, in_feature, old_class):
+        with torch.no_grad():
+            old_weight = self.layers[layer_id].pred[1].conv.weight
+            old_bias = self.layers[layer_id].pred[1].conv.bias
+            # print(model.layers[22].pred[1])
+            # print(model.layers[29].pred[1])
+            # out_dims = cfg.BASE_CLASS + cfg.NEW_CLASS + 5
+            self.layers[layer_id].pred[1] = CNNBlock(in_feature, (5 + num_class) * 3, bn_act=False, kernel_size=1)
+            # self.layers[layer_id].pred[1].conv.weight[:(5 + old_class) * 3] = old_weight
+            num_fea_old = 5 + old_class
+            self.layers[layer_id].pred[1].conv.weight[:num_fea_old] = old_weight[:num_fea_old]
+            self.layers[layer_id].pred[1].conv.weight[num_fea_old + (num_class - old_class): 2*num_fea_old + (num_class - old_class)] = old_weight[num_fea_old: 2* num_fea_old]
+            self.layers[layer_id].pred[1].conv.weight[2* num_fea_old + 2 * (num_class - old_class): 3*num_fea_old + 2 * (num_class - old_class)] = old_weight[2* num_fea_old:]
+            self.layers[layer_id].pred[1].conv.bias[:num_fea_old] = old_bias[:num_fea_old]
+            self.layers[layer_id].pred[1].conv.bias[num_fea_old + (num_class - old_class): 2*num_fea_old + (num_class - old_class)] = old_bias[num_fea_old: 2* num_fea_old]
+            self.layers[layer_id].pred[1].conv.bias[2* num_fea_old + 2 * (num_class - old_class): 3*num_fea_old + 2 * (num_class - old_class)] = old_bias[2* num_fea_old:]
+    def forward(self, x):
+        outputs = []  # for each scale
+        route_connections = []
+        self.features = []
+        for layer in self.layers:
+            if isinstance(layer, ScalePrediction):
+                # print(x.shape)
+                # print(layer.pred[1].conv.weight.shape)
+                outputs.append(layer(x))
+                continue
+            x = layer(x)
+            if isinstance(layer, ResidualBlock) and layer.num_repeats == 8:
+                self.features.append(x)
+                route_connections.append(x)
+            elif isinstance(layer, ResidualBlock) and layer.num_repeats == 4:
+                self.features.append(x)
+            elif isinstance(layer, nn.Upsample):
+                x = torch.cat([x, route_connections[-1]], dim=1)
+                route_connections.pop()
+        return outputs
+    def _create_conv_layers(self):
+        layers = nn.ModuleList()
+        in_channels = self.in_channels
+        for module in config:
+            if isinstance(module, tuple):
+                out_channels, kernel_size, stride = module
+                layers.append(
+                    CNNBlock(
+                        in_channels,
+                        out_channels,
+                        kernel_size=kernel_size,
+                        stride=stride,
+                        padding=1 if kernel_size == 3 else 0,
+                    )
+                )
+                in_channels = out_channels
+            elif isinstance(module, list):
+                num_repeats = module[1]
+                layers.append(ResidualBlock(in_channels, num_repeats=num_repeats,))
+            elif isinstance(module, str):
+                if module == "S":
+                    layers += [
+                        ResidualBlock(in_channels, use_residual=False, num_repeats=1),
+                        CNNBlock(in_channels, in_channels // 2, kernel_size=1),
+                        ScalePrediction(in_channels // 2, num_classes=self.num_classes),
+                    ]
+                    in_channels = in_channels // 2
+                elif module == "U":
+                    layers.append(nn.Upsample(scale_factor=2),)
+                    in_channels = in_channels * 3
+        return layers
+if __name__ == "__main__":
+    num_classes = 19
+    IMAGE_SIZE = 416
+    model = YOLOv3(num_classes=num_classes)
+    # print(model)
+    print(model.layers[15].pred[1].conv.weight.shape)
+    print(model.layers[15].pred[1].conv.bias.shape)
+    import torch.optim as optim
+    optimizer = optim.Adam(
+        model.parameters(), lr=cfg.LEARNING_RATE, weight_decay=cfg.WEIGHT_DECAY
+    )
+    from utils import load_checkpoint
+    load_checkpoint(
+            cfg.BASE_CHECK_POINT, model, optimizer, cfg.LEARNING_RATE
+    )
+    model.adaptation(layer_id = 15, num_class = 20, in_feature = 1024, old_class = num_classes)
+    model.adaptation(layer_id = 22, num_class = 20, in_feature = 512, old_class = num_classes)
+    model.adaptation(layer_id = 29, num_class = 20, in_feature = 256, old_class = num_classes)
+    # layer1 =
+    # model.eval()
+    # with torch.no_grad():
+    #     old_weight = model.layers[15].pred[1].conv.weight
+    #     old_bias = model.layers[15].pred[1].conv.bias
+    #     # print(model.layers[22].pred[1])
+    #     # print(model.layers[29].pred[1])
+    #     # out_dims = cfg.BASE_CLASS + cfg.NEW_CLASS + 5
+    #     model.layers[15].pred[1] = CNNBlock(1024, 25 * 3, bn_act=False, kernel_size=1)
+    #     model.layers[15].pred[1].conv.weight[:72] = old_weight
+    #     model.layers[15].pred[1].conv.bias[:72] = old_bias
+    print(model.layers[15].pred[1].conv.weight.shape)
+    # model.layers[22].pred[1] = CNNBlock(512, out_dims * 3,  kernel_size=1)
+    # model.layers[29].pred[1] = CNNBlock(256, out_dims * 3,  kernel_size=1)
+    x = torch.randn((2, 3, IMAGE_SIZE, IMAGE_SIZE))
+    out = model(x)
+    # assert model(x)[0].shape == (2, 3, IMAGE_SIZE//32, IMAGE_SIZE//32, num_classes + 5)
+    # assert model(x)[1].shape == (2, 3, IMAGE_SIZE//16, IMAGE_SIZE//16, num_classes + 5)
+    # assert model(x)[2].shape == (2, 3, IMAGE_SIZE//8, IMAGE_SIZE//8, num_classes + 5)
+    print("Success!")

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+numpy>=1.19.2
+matplotlib>=3.3.4
+torch>=1.7.1
+tqdm>=4.56.0
+torchvision>=0.8.2
+albumentations>=0.5.2
+pandas>=1.2.1
+Pillow>=8.1.0