Spaces:

TCheruy
/

SRGAN

Build error

App Files Files Community

Thibaud Cheruy commited on Jan 9, 2023

Commit

92d45d2

1 Parent(s): 56a1b0b

New: Add SRGAN Space

Browse files

Files changed (18) hide show

.gitattributes +1 -0
.gitignore +2 -0
README.md +1 -0
__pycache__/imgproc.cpython-310.pyc +0 -0
__pycache__/inference.cpython-310.pyc +0 -0
__pycache__/model.cpython-310.pyc +0 -0
__pycache__/utils.cpython-310.pyc +0 -0
app.py +111 -0
examples/bird.png +0 -0
examples/butterfly.png +0 -0
examples/comic.png +0 -0
examples/gray.png +0 -0
examples/man.png +0 -0
imgproc.py +575 -0
inference.py +101 -0
model.py +251 -0
utils.py +168 -0
weights/SRGAN_x4-ImageNet-8c4a7569.pth.tar +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pth.tar filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv/
2	+ .idea/

README.md CHANGED Viewed

@@ -8,6 +8,7 @@ sdk_version: 3.16.1
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: apache-2.0
+python_version: 3.10.3
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

__pycache__/imgproc.cpython-310.pyc ADDED Viewed

Binary file (13.5 kB). View file

__pycache__/inference.cpython-310.pyc ADDED Viewed

Binary file (2.47 kB). View file

__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (6.9 kB). View file

__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (5.46 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import gradio as gr
+import torch
+import cv2
+import imgproc
+from imgproc import image_to_tensor
+from inference import choice_device, build_model
+from utils import load_state_dict
+model = "srresnet_x4"
+device = choice_device("cpu")
+# Initialize the model
+sr_model = build_model(model, device)
+print(f"Build {model} model successfully.")
+# Load model weights
+sr_model = load_state_dict(sr_model, "weights/SRGAN_x4-ImageNet-8c4a7569.pth.tar")
+print(f"Load `{model}` model weights successfully.")
+# Start the verification mode of the model.
+sr_model.eval()
+def downscale(image):
+    (width, height, colors) = image.shape
+    new_height = int(60 * width / height)
+    return cv2.resize(image, (60, new_height), interpolation=cv2.INTER_AREA)
+def preprocess(image):
+    image = image / 255.0
+    # Convert image data to pytorch format data
+    tensor = image_to_tensor(image, False, False).unsqueeze_(0)
+    # Transfer tensor channel image format data to CUDA device
+    tensor = tensor.to(device="cpu", memory_format=torch.channels_last, non_blocking=True)
+    return tensor
+def processHighRes(image):
+    if image is None:
+        raise gr.Error("Please enter an image")
+    downscaled = downscale(image)
+    lr_tensor = preprocess(downscaled)
+    # Use the model to generate super-resolved images
+    with torch.no_grad():
+        sr_tensor = sr_model(lr_tensor)
+    # Save image
+    sr_image = imgproc.tensor_to_image(sr_tensor, False, False)
+    return [downscaled, sr_image]
+def processLowRes(image):
+    if image is None:
+        raise gr.Error("Please enter an image")
+    (width, height, colors) = image.shape
+    if width > 150 or height > 150:
+        raise gr.Error("Image is too big")
+    lr_tensor = preprocess(image)
+    # Use the model to generate super-resolved images
+    with torch.no_grad():
+        sr_tensor = sr_model(lr_tensor)
+    # Save image
+    sr_image = imgproc.tensor_to_image(sr_tensor, False, False)
+    return [sr_image]
+description = """<p style='text-align: center'> <a href='https://arxiv.org/abs/1609.04802' target='_blank'>Paper</a> | <a href=https://github.com/Lornatang/SRGAN-PyTorch target='_blank'>GitHub</a></p>"""
+with gr.Blocks() as demo:
+    gr.Markdown("# **<p align='center'>SRGAN: Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network</p>**")
+    gr.Markdown(description)
+    with gr.Tab("From high res"):
+        high_res_input = gr.Image(label="High-res source image", show_label=True)
+        with gr.Row():
+            low_res_output = gr.Image(label="Low-res image")
+            srgan_output = gr.Image(label="SRGAN Output")
+        high_res_button = gr.Button("Process")
+    with gr.Tab("From low res"):
+        low_res_input = gr.Image(label="Low-res source image", show_label=True)
+        srgan_upscale = gr.Image(label="SRGAN Output")
+        low_res_button = gr.Button("Process")
+    gr.Examples(
+        examples=["examples/bird.png", "examples/butterfly.png", "examples/comic.png", "examples/gray.png",
+                  "examples/man.png"],
+        inputs=[high_res_input],
+        outputs=[low_res_output, srgan_output],
+        fn=processHighRes
+    )
+    high_res_button.click(processHighRes, inputs=[high_res_input], outputs=[low_res_output, srgan_output])
+    low_res_button.click(processLowRes, inputs=[low_res_input], outputs=[srgan_upscale])
+    gr.Markdown("<p style='text-align: center'>Made for the 2022-2023 Grenoble-INP Phelma Image analysis course, by Thibaud CHERUY, Clément DEBUY & Yassine El Khanoussi.</p>")
+demo.launch()

examples/bird.png ADDED Viewed

examples/butterfly.png ADDED Viewed

examples/comic.png ADDED Viewed

examples/gray.png ADDED Viewed

examples/man.png ADDED Viewed

imgproc.py ADDED Viewed

	@@ -0,0 +1,575 @@

+# Copyright 2022 Dakewe Biotech Corporation. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import math
+import random
+from typing import Any
+import cv2
+import numpy as np
+import torch
+from numpy import ndarray
+from torch import Tensor
+__all__ = [
+    "image_to_tensor", "tensor_to_image",
+    "image_resize", "preprocess_one_image",
+    "expand_y", "rgb_to_ycbcr", "bgr_to_ycbcr", "ycbcr_to_bgr", "ycbcr_to_rgb",
+    "rgb_to_ycbcr_torch", "bgr_to_ycbcr_torch",
+    "center_crop", "random_crop", "random_rotate", "random_vertically_flip", "random_horizontally_flip",
+]
+# Code reference `https://github.com/xinntao/BasicSR/blob/master/basicsr/utils/matlab_functions.py`
+def _cubic(x: Any) -> Any:
+    """Implementation of `cubic` function in Matlab under Python language.
+    Args:
+        x: Element vector.
+    Returns:
+        Bicubic interpolation
+    """
+    absx = torch.abs(x)
+    absx2 = absx ** 2
+    absx3 = absx ** 3
+    return (1.5 * absx3 - 2.5 * absx2 + 1) * ((absx <= 1).type_as(absx)) + (
+            -0.5 * absx3 + 2.5 * absx2 - 4 * absx + 2) * (
+               ((absx > 1) * (absx <= 2)).type_as(absx))
+# Code reference `https://github.com/xinntao/BasicSR/blob/master/basicsr/utils/matlab_functions.py`
+def _calculate_weights_indices(in_length: int,
+                               out_length: int,
+                               scale: float,
+                               kernel_width: int,
+                               antialiasing: bool) -> [np.ndarray, np.ndarray, int, int]:
+    """Implementation of `calculate_weights_indices` function in Matlab under Python language.
+    Args:
+        in_length (int): Input length.
+        out_length (int): Output length.
+        scale (float): Scale factor.
+        kernel_width (int): Kernel width.
+        antialiasing (bool): Whether to apply antialiasing when down-sampling operations.
+            Caution: Bicubic down-sampling in PIL uses antialiasing by default.
+    Returns:
+       weights, indices, sym_len_s, sym_len_e
+    """
+    if (scale < 1) and antialiasing:
+        # Use a modified kernel (larger kernel width) to simultaneously
+        # interpolate and antialiasing
+        kernel_width = kernel_width / scale
+    # Output-space coordinates
+    x = torch.linspace(1, out_length, out_length)
+    # Input-space coordinates. Calculate the inverse mapping such that 0.5
+    # in output space maps to 0.5 in input space, and 0.5 + scale in output
+    # space maps to 1.5 in input space.
+    u = x / scale + 0.5 * (1 - 1 / scale)
+    # What is the left-most pixel that can be involved in the computation?
+    left = torch.floor(u - kernel_width / 2)
+    # What is the maximum number of pixels that can be involved in the
+    # computation?  Note: it's OK to use an extra pixel here; if the
+    # corresponding weights are all zero, it will be eliminated at the end
+    # of this function.
+    p = math.ceil(kernel_width) + 2
+    # The indices of the input pixels involved in computing the k-th output
+    # pixel are in row k of the indices matrix.
+    indices = left.view(out_length, 1).expand(out_length, p) + torch.linspace(0, p - 1, p).view(1, p).expand(
+        out_length, p)
+    # The weights used to compute the k-th output pixel are in row k of the
+    # weights matrix.
+    distance_to_center = u.view(out_length, 1).expand(out_length, p) - indices
+    # apply cubic kernel
+    if (scale < 1) and antialiasing:
+        weights = scale * _cubic(distance_to_center * scale)
+    else:
+        weights = _cubic(distance_to_center)
+    # Normalize the weights matrix so that each row sums to 1.
+    weights_sum = torch.sum(weights, 1).view(out_length, 1)
+    weights = weights / weights_sum.expand(out_length, p)
+    # If a column in weights is all zero, get rid of it. only consider the
+    # first and last column.
+    weights_zero_tmp = torch.sum((weights == 0), 0)
+    if not math.isclose(weights_zero_tmp[0], 0, rel_tol=1e-6):
+        indices = indices.narrow(1, 1, p - 2)
+        weights = weights.narrow(1, 1, p - 2)
+    if not math.isclose(weights_zero_tmp[-1], 0, rel_tol=1e-6):
+        indices = indices.narrow(1, 0, p - 2)
+        weights = weights.narrow(1, 0, p - 2)
+    weights = weights.contiguous()
+    indices = indices.contiguous()
+    sym_len_s = -indices.min() + 1
+    sym_len_e = indices.max() - in_length
+    indices = indices + sym_len_s - 1
+    return weights, indices, int(sym_len_s), int(sym_len_e)
+def image_to_tensor(image: ndarray, range_norm: bool, half: bool) -> Tensor:
+    """Convert the image data type to the Tensor (NCWH) data type supported by PyTorch
+    Args:
+        image (np.ndarray): The image data read by ``OpenCV.imread``, the data range is [0,255] or [0, 1]
+        range_norm (bool): Scale [0, 1] data to between [-1, 1]
+        half (bool): Whether to convert torch.float32 similarly to torch.half type
+    Returns:
+        tensor (Tensor): Data types supported by PyTorch
+    Examples:
+        >>> example_image = cv2.imread("lr_image.bmp")
+        >>> example_tensor = image_to_tensor(example_image, range_norm=True, half=False)
+    """
+    # Convert image data type to Tensor data type
+    tensor = torch.from_numpy(np.ascontiguousarray(image)).permute(2, 0, 1).float()
+    # Scale the image data from [0, 1] to [-1, 1]
+    if range_norm:
+        tensor = tensor.mul(2.0).sub(1.0)
+    # Convert torch.float32 image data type to torch.half image data type
+    if half:
+        tensor = tensor.half()
+    return tensor
+def tensor_to_image(tensor: Tensor, range_norm: bool, half: bool) -> Any:
+    """Convert the Tensor(NCWH) data type supported by PyTorch to the np.ndarray(WHC) image data type
+    Args:
+        tensor (Tensor): Data types supported by PyTorch (NCHW), the data range is [0, 1]
+        range_norm (bool): Scale [-1, 1] data to between [0, 1]
+        half (bool): Whether to convert torch.float32 similarly to torch.half type.
+    Returns:
+        image (np.ndarray): Data types supported by PIL or OpenCV
+    Examples:
+        >>> example_image = cv2.imread("lr_image.bmp")
+        >>> example_tensor = image_to_tensor(example_image, range_norm=False, half=False)
+    """
+    if range_norm:
+        tensor = tensor.add(1.0).div(2.0)
+    if half:
+        tensor = tensor.half()
+    image = tensor.squeeze(0).permute(1, 2, 0).mul(255).clamp(0, 255).cpu().numpy().astype("uint8")
+    return image
+def preprocess_one_image(image_path: str, device: torch.device) -> Tensor:
+    image = cv2.imread(image_path).astype(np.float32) / 255.0
+    # BGR to RGB
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # Convert image data to pytorch format data
+    tensor = image_to_tensor(image, False, False).unsqueeze_(0)
+    # Transfer tensor channel image format data to CUDA device
+    tensor = tensor.to(device=device, memory_format=torch.channels_last, non_blocking=True)
+    return tensor
+# Code reference `https://github.com/xinntao/BasicSR/blob/master/basicsr/utils/matlab_functions.py`
+def image_resize(image: Any, scale_factor: float, antialiasing: bool = True) -> Any:
+    """Implementation of `imresize` function in Matlab under Python language.
+    Args:
+        image: The input image.
+        scale_factor (float): Scale factor. The same scale applies for both height and width.
+        antialiasing (bool): Whether to apply antialiasing when down-sampling operations.
+            Caution: Bicubic down-sampling in `PIL` uses antialiasing by default. Default: ``True``.
+    Returns:
+        out_2 (np.ndarray): Output image with shape (c, h, w), [0, 1] range, w/o round
+    """
+    squeeze_flag = False
+    if type(image).__module__ == np.__name__:  # numpy type
+        numpy_type = True
+        if image.ndim == 2:
+            image = image[:, :, None]
+            squeeze_flag = True
+        image = torch.from_numpy(image.transpose(2, 0, 1)).float()
+    else:
+        numpy_type = False
+        if image.ndim == 2:
+            image = image.unsqueeze(0)
+            squeeze_flag = True
+    in_c, in_h, in_w = image.size()
+    out_h, out_w = math.ceil(in_h * scale_factor), math.ceil(in_w * scale_factor)
+    kernel_width = 4
+    # get weights and indices
+    weights_h, indices_h, sym_len_hs, sym_len_he = _calculate_weights_indices(in_h, out_h, scale_factor, kernel_width,
+                                                                              antialiasing)
+    weights_w, indices_w, sym_len_ws, sym_len_we = _calculate_weights_indices(in_w, out_w, scale_factor, kernel_width,
+                                                                              antialiasing)
+    # process H dimension
+    # symmetric copying
+    img_aug = torch.FloatTensor(in_c, in_h + sym_len_hs + sym_len_he, in_w)
+    img_aug.narrow(1, sym_len_hs, in_h).copy_(image)
+    sym_patch = image[:, :sym_len_hs, :]
+    inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(1, inv_idx)
+    img_aug.narrow(1, 0, sym_len_hs).copy_(sym_patch_inv)
+    sym_patch = image[:, -sym_len_he:, :]
+    inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(1, inv_idx)
+    img_aug.narrow(1, sym_len_hs + in_h, sym_len_he).copy_(sym_patch_inv)
+    out_1 = torch.FloatTensor(in_c, out_h, in_w)
+    kernel_width = weights_h.size(1)
+    for i in range(out_h):
+        idx = int(indices_h[i][0])
+        for j in range(in_c):
+            out_1[j, i, :] = img_aug[j, idx:idx + kernel_width, :].transpose(0, 1).mv(weights_h[i])
+    # process W dimension
+    # symmetric copying
+    out_1_aug = torch.FloatTensor(in_c, out_h, in_w + sym_len_ws + sym_len_we)
+    out_1_aug.narrow(2, sym_len_ws, in_w).copy_(out_1)
+    sym_patch = out_1[:, :, :sym_len_ws]
+    inv_idx = torch.arange(sym_patch.size(2) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(2, inv_idx)
+    out_1_aug.narrow(2, 0, sym_len_ws).copy_(sym_patch_inv)
+    sym_patch = out_1[:, :, -sym_len_we:]
+    inv_idx = torch.arange(sym_patch.size(2) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(2, inv_idx)
+    out_1_aug.narrow(2, sym_len_ws + in_w, sym_len_we).copy_(sym_patch_inv)
+    out_2 = torch.FloatTensor(in_c, out_h, out_w)
+    kernel_width = weights_w.size(1)
+    for i in range(out_w):
+        idx = int(indices_w[i][0])
+        for j in range(in_c):
+            out_2[j, :, i] = out_1_aug[j, :, idx:idx + kernel_width].mv(weights_w[i])
+    if squeeze_flag:
+        out_2 = out_2.squeeze(0)
+    if numpy_type:
+        out_2 = out_2.numpy()
+        if not squeeze_flag:
+            out_2 = out_2.transpose(1, 2, 0)
+    return out_2
+def expand_y(image: np.ndarray) -> np.ndarray:
+    """Convert BGR channel to YCbCr format,
+    and expand Y channel data in YCbCr, from HW to HWC
+    Args:
+        image (np.ndarray): Y channel image data
+    Returns:
+        y_image (np.ndarray): Y-channel image data in HWC form
+    """
+    # Normalize image data to [0, 1]
+    image = image.astype(np.float32) / 255.
+    # Convert BGR to YCbCr, and extract only Y channel
+    y_image = bgr_to_ycbcr(image, only_use_y_channel=True)
+    # Expand Y channel
+    y_image = y_image[..., None]
+    # Normalize the image data to [0, 255]
+    y_image = y_image.astype(np.float64) * 255.0
+    return y_image
+def rgb_to_ycbcr(image: np.ndarray, only_use_y_channel: bool) -> np.ndarray:
+    """Implementation of rgb2ycbcr function in Matlab under Python language
+    Args:
+        image (np.ndarray): Image input in RGB format.
+        only_use_y_channel (bool): Extract Y channel separately
+    Returns:
+        image (np.ndarray): YCbCr image array data
+    """
+    if only_use_y_channel:
+        image = np.dot(image, [65.481, 128.553, 24.966]) + 16.0
+    else:
+        image = np.matmul(image, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], [24.966, 112.0, -18.214]]) + [
+            16, 128, 128]
+    image /= 255.
+    image = image.astype(np.float32)
+    return image
+def bgr_to_ycbcr(image: np.ndarray, only_use_y_channel: bool) -> np.ndarray:
+    """Implementation of bgr2ycbcr function in Matlab under Python language.
+    Args:
+        image (np.ndarray): Image input in BGR format
+        only_use_y_channel (bool): Extract Y channel separately
+    Returns:
+        image (np.ndarray): YCbCr image array data
+    """
+    if only_use_y_channel:
+        image = np.dot(image, [24.966, 128.553, 65.481]) + 16.0
+    else:
+        image = np.matmul(image, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], [65.481, -37.797, 112.0]]) + [
+            16, 128, 128]
+    image /= 255.
+    image = image.astype(np.float32)
+    return image
+def ycbcr_to_rgb(image: np.ndarray) -> np.ndarray:
+    """Implementation of ycbcr2rgb function in Matlab under Python language.
+    Args:
+        image (np.ndarray): Image input in YCbCr format.
+    Returns:
+        image (np.ndarray): RGB image array data
+    """
+    image_dtype = image.dtype
+    image *= 255.
+    image = np.matmul(image, [[0.00456621, 0.00456621, 0.00456621],
+                              [0, -0.00153632, 0.00791071],
+                              [0.00625893, -0.00318811, 0]]) * 255.0 + [-222.921, 135.576, -276.836]
+    image /= 255.
+    image = image.astype(image_dtype)
+    return image
+def ycbcr_to_bgr(image: np.ndarray) -> np.ndarray:
+    """Implementation of ycbcr2bgr function in Matlab under Python language.
+    Args:
+        image (np.ndarray): Image input in YCbCr format.
+    Returns:
+        image (np.ndarray): BGR image array data
+    """
+    image_dtype = image.dtype
+    image *= 255.
+    image = np.matmul(image, [[0.00456621, 0.00456621, 0.00456621],
+                              [0.00791071, -0.00153632, 0],
+                              [0, -0.00318811, 0.00625893]]) * 255.0 + [-276.836, 135.576, -222.921]
+    image /= 255.
+    image = image.astype(image_dtype)
+    return image
+def rgb_to_ycbcr_torch(tensor: Tensor, only_use_y_channel: bool) -> Tensor:
+    """Implementation of rgb2ycbcr function in Matlab under PyTorch
+    References from：`https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion`
+    Args:
+        tensor (Tensor): Image data in PyTorch format
+        only_use_y_channel (bool): Extract only Y channel
+    Returns:
+        tensor (Tensor): YCbCr image data in PyTorch format
+    """
+    if only_use_y_channel:
+        weight = Tensor([[65.481], [128.553], [24.966]]).to(tensor)
+        tensor = torch.matmul(tensor.permute(0, 2, 3, 1), weight).permute(0, 3, 1, 2) + 16.0
+    else:
+        weight = Tensor([[65.481, -37.797, 112.0],
+                         [128.553, -74.203, -93.786],
+                         [24.966, 112.0, -18.214]]).to(tensor)
+        bias = Tensor([16, 128, 128]).view(1, 3, 1, 1).to(tensor)
+        tensor = torch.matmul(tensor.permute(0, 2, 3, 1), weight).permute(0, 3, 1, 2) + bias
+    tensor /= 255.
+    return tensor
+def bgr_to_ycbcr_torch(tensor: Tensor, only_use_y_channel: bool) -> Tensor:
+    """Implementation of bgr2ycbcr function in Matlab under PyTorch
+    References from：`https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion`
+    Args:
+        tensor (Tensor): Image data in PyTorch format
+        only_use_y_channel (bool): Extract only Y channel
+    Returns:
+        tensor (Tensor): YCbCr image data in PyTorch format
+    """
+    if only_use_y_channel:
+        weight = Tensor([[24.966], [128.553], [65.481]]).to(tensor)
+        tensor = torch.matmul(tensor.permute(0, 2, 3, 1), weight).permute(0, 3, 1, 2) + 16.0
+    else:
+        weight = Tensor([[24.966, 112.0, -18.214],
+                         [128.553, -74.203, -93.786],
+                         [65.481, -37.797, 112.0]]).to(tensor)
+        bias = Tensor([16, 128, 128]).view(1, 3, 1, 1).to(tensor)
+        tensor = torch.matmul(tensor.permute(0, 2, 3, 1), weight).permute(0, 3, 1, 2) + bias
+    tensor /= 255.
+    return tensor
+def center_crop(image: np.ndarray, image_size: int) -> np.ndarray:
+    """Crop small image patches from one image center area.
+    Args:
+        image (np.ndarray): The input image for `OpenCV.imread`.
+        image_size (int): The size of the captured image area.
+    Returns:
+        patch_image (np.ndarray): Small patch image
+    """
+    image_height, image_width = image.shape[:2]
+    # Just need to find the top and left coordinates of the image
+    top = (image_height - image_size) // 2
+    left = (image_width - image_size) // 2
+    # Crop image patch
+    patch_image = image[top:top + image_size, left:left + image_size, ...]
+    return patch_image
+def random_crop(image: np.ndarray, image_size: int) -> np.ndarray:
+    """Crop small image patches from one image.
+    Args:
+        image (np.ndarray): The input image for `OpenCV.imread`.
+        image_size (int): The size of the captured image area.
+    Returns:
+        patch_image (np.ndarray): Small patch image
+    """
+    image_height, image_width = image.shape[:2]
+    # Just need to find the top and left coordinates of the image
+    top = random.randint(0, image_height - image_size)
+    left = random.randint(0, image_width - image_size)
+    # Crop image patch
+    patch_image = image[top:top + image_size, left:left + image_size, ...]
+    return patch_image
+def random_rotate(image,
+                  angles: list,
+                  center: tuple[int, int] = None,
+                  scale_factor: float = 1.0) -> np.ndarray:
+    """Rotate an image by a random angle
+    Args:
+        image (np.ndarray): Image read with OpenCV
+        angles (list): Rotation angle range
+        center (optional, tuple[int, int]): High resolution image selection center point. Default: ``None``
+        scale_factor (optional, float): scaling factor. Default: 1.0
+    Returns:
+        rotated_image (np.ndarray): image after rotation
+    """
+    image_height, image_width = image.shape[:2]
+    if center is None:
+        center = (image_width // 2, image_height // 2)
+    # Random select specific angle
+    angle = random.choice(angles)
+    matrix = cv2.getRotationMatrix2D(center, angle, scale_factor)
+    rotated_image = cv2.warpAffine(image, matrix, (image_width, image_height))
+    return rotated_image
+def random_horizontally_flip(image: np.ndarray, p: float = 0.5) -> np.ndarray:
+    """Flip the image upside down randomly
+    Args:
+        image (np.ndarray): Image read with OpenCV
+        p (optional, float): Horizontally flip probability. Default: 0.5
+    Returns:
+        horizontally_flip_image (np.ndarray): image after horizontally flip
+    """
+    if random.random() < p:
+        horizontally_flip_image = cv2.flip(image, 1)
+    else:
+        horizontally_flip_image = image
+    return horizontally_flip_image
+def random_vertically_flip(image: np.ndarray, p: float = 0.5) -> np.ndarray:
+    """Flip an image horizontally randomly
+    Args:
+        image (np.ndarray): Image read with OpenCV
+        p (optional, float): Vertically flip probability. Default: 0.5
+    Returns:
+        vertically_flip_image (np.ndarray): image after vertically flip
+    """
+    if random.random() < p:
+        vertically_flip_image = cv2.flip(image, 0)
+    else:
+        vertically_flip_image = image
+    return vertically_flip_image

inference.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Copyright 2022 Dakewe Biotech Corporation. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import argparse
+import os
+import cv2
+import torch
+from torch import nn
+import imgproc
+import model
+from utils import load_state_dict
+model_names = sorted(
+    name for name in model.__dict__ if
+    name.islower() and not name.startswith("__") and callable(model.__dict__[name]))
+def choice_device(device_type: str) -> torch.device:
+    # Select model processing equipment type
+    if device_type == "cuda":
+        device = torch.device("cuda", 0)
+    else:
+        device = torch.device("cpu")
+    return device
+def build_model(model_arch_name: str, device: torch.device) -> nn.Module:
+    # Initialize the super-resolution model
+    sr_model = model.__dict__[model_arch_name](in_channels=3,
+                                               out_channels=3,
+                                               channels=64,
+                                               num_rcb=16)
+    sr_model = sr_model.to(device=device)
+    return sr_model
+def main(args):
+    device = choice_device(args.device_type)
+    # Initialize the model
+    sr_model = build_model(args.model_arch_name, device)
+    print(f"Build `{args.model_arch_name}` model successfully.")
+    # Load model weights
+    sr_model = load_state_dict(sr_model, args.model_weights_path)
+    print(f"Load `{args.model_arch_name}` model weights `{os.path.abspath(args.model_weights_path)}` successfully.")
+    # Start the verification mode of the model.
+    sr_model.eval()
+    lr_tensor = imgproc.preprocess_one_image(args.inputs_path, device)
+    # Use the model to generate super-resolved images
+    with torch.no_grad():
+        sr_tensor = sr_model(lr_tensor)
+    # Save image
+    sr_image = imgproc.tensor_to_image(sr_tensor, False, False)
+    sr_image = cv2.cvtColor(sr_image, cv2.COLOR_RGB2BGR)
+    cv2.imwrite(args.output_path, sr_image)
+    print(f"SR image save to `{args.output_path}`")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Using the model generator super-resolution images.")
+    parser.add_argument("--model_arch_name",
+                        type=str,
+                        default="srresnet_x4")
+    parser.add_argument("--inputs_path",
+                        type=str,
+                        default="./figure/comic_lr.png",
+                        help="Low-resolution image path.")
+    parser.add_argument("--output_path",
+                        type=str,
+                        default="./figure/comic_sr.png",
+                        help="Super-resolution image path.")
+    parser.add_argument("--model_weights_path",
+                        type=str,
+                        default="./results/pretrained_models/SRGAN_x4-ImageNet-8c4a7569.pth.tar",
+                        help="Model weights file path.")
+    parser.add_argument("--device_type",
+                        type=str,
+                        default="cpu",
+                        choices=["cpu", "cuda"])
+    args = parser.parse_args()
+    main(args)

model.py ADDED Viewed

	@@ -0,0 +1,251 @@

+# Copyright 2022 Dakewe Biotech Corporation. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import math
+from typing import Any
+import torch
+from torch import Tensor
+from torch import nn
+from torch.nn import functional as F_torch
+from torchvision import models
+from torchvision import transforms
+from torchvision.models.feature_extraction import create_feature_extractor
+__all__ = [
+    "SRResNet", "Discriminator",
+    "srresnet_x4", "discriminator", "content_loss",
+]
+class SRResNet(nn.Module):
+    def __init__(
+            self,
+            in_channels: int,
+            out_channels: int,
+            channels: int,
+            num_rcb: int,
+            upscale_factor: int
+    ) -> None:
+        super(SRResNet, self).__init__()
+        # Low frequency information extraction layer
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_channels, channels, (9, 9), (1, 1), (4, 4)),
+            nn.PReLU(),
+        )
+        # High frequency information extraction block
+        trunk = []
+        for _ in range(num_rcb):
+            trunk.append(_ResidualConvBlock(channels))
+        self.trunk = nn.Sequential(*trunk)
+        # High-frequency information linear fusion layer
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(channels, channels, (3, 3), (1, 1), (1, 1), bias=False),
+            nn.BatchNorm2d(channels),
+        )
+        # zoom block
+        upsampling = []
+        if upscale_factor == 2 or upscale_factor == 4 or upscale_factor == 8:
+            for _ in range(int(math.log(upscale_factor, 2))):
+                upsampling.append(_UpsampleBlock(channels, 2))
+        elif upscale_factor == 3:
+            upsampling.append(_UpsampleBlock(channels, 3))
+        self.upsampling = nn.Sequential(*upsampling)
+        # reconstruction block
+        self.conv3 = nn.Conv2d(channels, out_channels, (9, 9), (1, 1), (4, 4))
+        # Initialize neural network weights
+        self._initialize_weights()
+    def forward(self, x: Tensor) -> Tensor:
+        return self._forward_impl(x)
+    # Support torch.script function
+    def _forward_impl(self, x: Tensor) -> Tensor:
+        out1 = self.conv1(x)
+        out = self.trunk(out1)
+        out2 = self.conv2(out)
+        out = torch.add(out1, out2)
+        out = self.upsampling(out)
+        out = self.conv3(out)
+        out = torch.clamp_(out, 0.0, 1.0)
+        return out
+    def _initialize_weights(self) -> None:
+        for module in self.modules():
+            if isinstance(module, nn.Conv2d):
+                nn.init.kaiming_normal_(module.weight)
+                if module.bias is not None:
+                    nn.init.constant_(module.bias, 0)
+            elif isinstance(module, nn.BatchNorm2d):
+                nn.init.constant_(module.weight, 1)
+class Discriminator(nn.Module):
+    def __init__(self) -> None:
+        super(Discriminator, self).__init__()
+        self.features = nn.Sequential(
+            # input size. (3) x 96 x 96
+            nn.Conv2d(3, 64, (3, 3), (1, 1), (1, 1), bias=True),
+            nn.LeakyReLU(0.2, True),
+            # state size. (64) x 48 x 48
+            nn.Conv2d(64, 64, (3, 3), (2, 2), (1, 1), bias=False),
+            nn.BatchNorm2d(64),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(64, 128, (3, 3), (1, 1), (1, 1), bias=False),
+            nn.BatchNorm2d(128),
+            nn.LeakyReLU(0.2, True),
+            # state size. (128) x 24 x 24
+            nn.Conv2d(128, 128, (3, 3), (2, 2), (1, 1), bias=False),
+            nn.BatchNorm2d(128),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(128, 256, (3, 3), (1, 1), (1, 1), bias=False),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(0.2, True),
+            # state size. (256) x 12 x 12
+            nn.Conv2d(256, 256, (3, 3), (2, 2), (1, 1), bias=False),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(256, 512, (3, 3), (1, 1), (1, 1), bias=False),
+            nn.BatchNorm2d(512),
+            nn.LeakyReLU(0.2, True),
+            # state size. (512) x 6 x 6
+            nn.Conv2d(512, 512, (3, 3), (2, 2), (1, 1), bias=False),
+            nn.BatchNorm2d(512),
+            nn.LeakyReLU(0.2, True),
+        )
+        self.classifier = nn.Sequential(
+            nn.Linear(512 * 6 * 6, 1024),
+            nn.LeakyReLU(0.2, True),
+            nn.Linear(1024, 1),
+        )
+    def forward(self, x: Tensor) -> Tensor:
+        # Input image size must equal 96
+        assert x.shape[2] == 96 and x.shape[3] == 96, "Image shape must equal 96x96"
+        out = self.features(x)
+        out = torch.flatten(out, 1)
+        out = self.classifier(out)
+        return out
+class _ResidualConvBlock(nn.Module):
+    def __init__(self, channels: int) -> None:
+        super(_ResidualConvBlock, self).__init__()
+        self.rcb = nn.Sequential(
+            nn.Conv2d(channels, channels, (3, 3), (1, 1), (1, 1), bias=False),
+            nn.BatchNorm2d(channels),
+            nn.PReLU(),
+            nn.Conv2d(channels, channels, (3, 3), (1, 1), (1, 1), bias=False),
+            nn.BatchNorm2d(channels),
+        )
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+        out = self.rcb(x)
+        out = torch.add(out, identity)
+        return out
+class _UpsampleBlock(nn.Module):
+    def __init__(self, channels: int, upscale_factor: int) -> None:
+        super(_UpsampleBlock, self).__init__()
+        self.upsample_block = nn.Sequential(
+            nn.Conv2d(channels, channels * upscale_factor * upscale_factor, (3, 3), (1, 1), (1, 1)),
+            nn.PixelShuffle(2),
+            nn.PReLU(),
+        )
+    def forward(self, x: Tensor) -> Tensor:
+        out = self.upsample_block(x)
+        return out
+class _ContentLoss(nn.Module):
+    """Constructs a content loss function based on the VGG19 network.
+    Using high-level feature mapping layers from the latter layers will focus more on the texture content of the image.
+    Paper reference list:
+        -`Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network <https://arxiv.org/pdf/1609.04802.pdf>` paper.
+        -`ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks                    <https://arxiv.org/pdf/1809.00219.pdf>` paper.
+        -`Perceptual Extreme Super Resolution Network with Receptive Field Block               <https://arxiv.org/pdf/2005.12597.pdf>` paper.
+     """
+    def __init__(
+            self,
+            feature_model_extractor_node: str,
+            feature_model_normalize_mean: list,
+            feature_model_normalize_std: list
+    ) -> None:
+        super(_ContentLoss, self).__init__()
+        # Get the name of the specified feature extraction node
+        self.feature_model_extractor_node = feature_model_extractor_node
+        # Load the VGG19 model trained on the ImageNet dataset.
+        model = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1)
+        # Extract the thirty-sixth layer output in the VGG19 model as the content loss.
+        self.feature_extractor = create_feature_extractor(model, [feature_model_extractor_node])
+        # set to validation mode
+        self.feature_extractor.eval()
+        # The preprocessing method of the input data.
+        # This is the VGG model preprocessing method of the ImageNet dataset.
+        self.normalize = transforms.Normalize(feature_model_normalize_mean, feature_model_normalize_std)
+        # Freeze model parameters.
+        for model_parameters in self.feature_extractor.parameters():
+            model_parameters.requires_grad = False
+    def forward(self, sr_tensor: Tensor, gt_tensor: Tensor) -> Tensor:
+        # Standardized operations
+        sr_tensor = self.normalize(sr_tensor)
+        gt_tensor = self.normalize(gt_tensor)
+        sr_feature = self.feature_extractor(sr_tensor)[self.feature_model_extractor_node]
+        gt_feature = self.feature_extractor(gt_tensor)[self.feature_model_extractor_node]
+        # Find the feature map difference between the two images
+        loss = F_torch.mse_loss(sr_feature, gt_feature)
+        return loss
+def srresnet_x4(**kwargs: Any) -> SRResNet:
+    model = SRResNet(upscale_factor=4, **kwargs)
+    return model
+def discriminator() -> Discriminator:
+    model = Discriminator()
+    return model
+def content_loss(**kwargs: Any) -> _ContentLoss:
+    content_loss = _ContentLoss(**kwargs)
+    return content_loss

utils.py ADDED Viewed

	@@ -0,0 +1,168 @@

+# Copyright 2022 Dakewe Biotech Corporation. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+import shutil
+from enum import Enum
+from typing import Any
+import torch
+from torch import nn
+from torch.nn import Module
+from torch.optim import Optimizer
+__all__ = [
+    "load_state_dict", "make_directory", "save_checkpoint",
+    "Summary", "AverageMeter", "ProgressMeter"
+]
+def load_state_dict(
+        model: nn.Module,
+        model_weights_path: str,
+        ema_model: nn.Module = None,
+        optimizer: torch.optim.Optimizer = None,
+        scheduler: torch.optim.lr_scheduler = None,
+        load_mode: str = None,
+) -> tuple[Module, Module, Any, Any, Any, Optimizer | None, Any] | tuple[Module, Any, Any, Any, Optimizer | None, Any] | Module:
+    # Load model weights
+    checkpoint = torch.load(model_weights_path, map_location=lambda storage, loc: storage)
+    if load_mode == "resume":
+        # Restore the parameters in the training node to this point
+        start_epoch = checkpoint["epoch"]
+        best_psnr = checkpoint["best_psnr"]
+        best_ssim = checkpoint["best_ssim"]
+        # Load model state dict. Extract the fitted model weights
+        model_state_dict = model.state_dict()
+        state_dict = {k: v for k, v in checkpoint["state_dict"].items() if k in model_state_dict.keys()}
+        # Overwrite the model weights to the current model (base model)
+        model_state_dict.update(state_dict)
+        model.load_state_dict(model_state_dict)
+        # Load the optimizer model
+        optimizer.load_state_dict(checkpoint["optimizer"])
+        if scheduler is not None:
+            # Load the scheduler model
+            scheduler.load_state_dict(checkpoint["scheduler"])
+        if ema_model is not None:
+            # Load ema model state dict. Extract the fitted model weights
+            ema_model_state_dict = ema_model.state_dict()
+            ema_state_dict = {k: v for k, v in checkpoint["ema_state_dict"].items() if k in ema_model_state_dict.keys()}
+            # Overwrite the model weights to the current model (ema model)
+            ema_model_state_dict.update(ema_state_dict)
+            ema_model.load_state_dict(ema_model_state_dict)
+        return model, ema_model, start_epoch, best_psnr, best_ssim, optimizer, scheduler
+    else:
+        # Load model state dict. Extract the fitted model weights
+        model_state_dict = model.state_dict()
+        state_dict = {k: v for k, v in checkpoint["state_dict"].items() if
+                      k in model_state_dict.keys() and v.size() == model_state_dict[k].size()}
+        # Overwrite the model weights to the current model
+        model_state_dict.update(state_dict)
+        model.load_state_dict(model_state_dict)
+        return model
+def make_directory(dir_path: str) -> None:
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+def save_checkpoint(
+        state_dict: dict,
+        file_name: str,
+        samples_dir: str,
+        results_dir: str,
+        best_file_name: str,
+        last_file_name: str,
+        is_best: bool = False,
+        is_last: bool = False,
+) -> None:
+    checkpoint_path = os.path.join(samples_dir, file_name)
+    torch.save(state_dict, checkpoint_path)
+    if is_best:
+        shutil.copyfile(checkpoint_path, os.path.join(results_dir, best_file_name))
+    if is_last:
+        shutil.copyfile(checkpoint_path, os.path.join(results_dir, last_file_name))
+class Summary(Enum):
+    NONE = 0
+    AVERAGE = 1
+    SUM = 2
+    COUNT = 3
+class AverageMeter(object):
+    def __init__(self, name, fmt=":f", summary_type=Summary.AVERAGE):
+        self.name = name
+        self.fmt = fmt
+        self.summary_type = summary_type
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+    def __str__(self):
+        fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
+        return fmtstr.format(**self.__dict__)
+    def summary(self):
+        if self.summary_type is Summary.NONE:
+            fmtstr = ""
+        elif self.summary_type is Summary.AVERAGE:
+            fmtstr = "{name} {avg:.2f}"
+        elif self.summary_type is Summary.SUM:
+            fmtstr = "{name} {sum:.2f}"
+        elif self.summary_type is Summary.COUNT:
+            fmtstr = "{name} {count:.2f}"
+        else:
+            raise ValueError(f"Invalid summary type {self.summary_type}")
+        return fmtstr.format(**self.__dict__)
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print("\t".join(entries))
+    def display_summary(self):
+        entries = [" *"]
+        entries += [meter.summary() for meter in self.meters]
+        print(" ".join(entries))
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = "{:" + str(num_digits) + "d}"
+        return "[" + fmt + "/" + fmt.format(num_batches) + "]"

weights/SRGAN_x4-ImageNet-8c4a7569.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c5431b5921e1509190aed6aca02c7d5838f4805e8ae6f9fa08c140260b6a2a3
+size 6285796