bilegentile's picture
Upload folder using huggingface_hub
c19ca42 verified
import cv2
import torch
import torch.nn.functional as F
import numpy as np
from PIL import Image
from modules import devices, masking
from modules.shared import opts
class DepthAnythingDetector:
"""https://github.com/LiheYoung/Depth-Anything"""
def __init__(self, model):
from torchvision.transforms import Compose
from modules.control.proc.depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
self.model = model
self.transform = Compose([
Resize(
width=518,
height=518,
resize_target=False,
keep_aspect_ratio=True,
ensure_multiple_of=14,
resize_method="lower_bound",
image_interpolation_method=cv2.INTER_CUBIC,
),
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
PrepareForNet()])
@classmethod
def from_pretrained(cls, pretrained_model_or_path: str, cache_dir: str) -> str:
from modules.control.proc.depth_anything.dpt import DPT_DINOv2
import huggingface_hub as hf
model = (
DPT_DINOv2(
encoder="vitl",
features=256,
out_channels=[256, 512, 1024, 1024],
localhub=False,
)
.to(devices.device)
.eval()
)
model_path = hf.hf_hub_download(repo_id=pretrained_model_or_path, filename="pytorch_model.bin", cache_dir=cache_dir)
model_dict = torch.load(model_path)
model.load_state_dict(model_dict)
return cls(model)
def __call__(self, image, color_map: str = "none", output_type: str = 'pil'):
self.model.to(devices.device)
if isinstance(image, Image.Image):
image = np.array(image)
h, w = image.shape[:2]
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
image = self.transform({ "image": image })["image"]
image = torch.from_numpy(image).unsqueeze(0).to(devices.device)
with devices.inference_context():
depth = self.model(image)
if opts.control_move_processor:
self.model.to('cpu')
depth = F.interpolate(depth[None], (h, w), mode="bilinear", align_corners=False)[0, 0]
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
depth = depth.cpu().numpy().astype(np.uint8)
if color_map != 'none':
depth = cv2.applyColorMap(depth, masking.COLORMAP.index(color_map))[:, :, ::-1]
if output_type == "pil":
depth = Image.fromarray(depth)
return depth
# def unload_model(self):
# self.model.to("cpu")