File size: 2,743 Bytes
cabf51c
a109861
ca99229
ef08f4e
01ddf42
 
91ef096
 
ef08f4e
75780cb
 
41f26de
01ddf42
 
 
 
 
 
8096d9e
68aee5f
01ddf42
bc2dc1a
 
 
 
6148b9b
1265529
cabf51c
1265529
 
8096d9e
02a0351
 
cabf51c
 
 
bc2dc1a
 
34cbd22
 
bc2dc1a
 
687617e
bc2dc1a
 
 
 
 
 
 
1265529
6ab8d85
1265529
bc2dc1a
 
dfa7b0a
1265529
 
 
 
 
 
dfa7b0a
f79cf87
1265529
 
bc2dc1a
8096d9e
4fea159
8096d9e
4fea159
 
 
 
1265529
bc2dc1a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import spaces
import os
import gradio as gr
import shutil
import sys 
import subprocess
import shlex
import torch

os.system("pip install git+https://github.com/facebookresearch/detectron2.git")
os.system("git clone https://github.com/Visual-AI/vCLR.git && cd vCLR && rm -f requirements.txt && cd .. && cp deformable_train_voc_eval_nonvoc.py vCLR/projects/vCLR_deformable_mask/configs/dino-resnet/")

subprocess.run(
    shlex.split(
        "pip install detrex-0.3.0-cp310-cp310-linux_x86_64.whl"
    )
)
    
sys.path.append("vCLR/")


from demo.predictors import VisualizationDemo
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import LazyConfig, instantiate
import numpy as np
from PIL import Image


if __name__ == "__main__":
    gr.close_all()
    cfg = LazyConfig.load("vCLR/projects/vCLR_deformable_mask/configs/dino-resnet/deformable_train_voc_eval_nonvoc.py")
    cfg["model"].device = "cuda"
    cfg["train"].device = "cuda"

    # @spaces.GPU(duration=40, progress=gr.Progress(track_tqdm=True))
    # def 
    model = instantiate(cfg.model)
    checkpointer = DetectionCheckpointer(model)
    # checkpointer.load("https://huggingface.co/allencbzhang/vCLR/resolve/main/vCLR_deformable_train_on_voc.pth")
    checkpointer.load("https://huggingface.co/allencbzhang/vCLR/resolve/main/vCLR_deformable_train_on_coco.pth")

    model.eval()
    model.cuda()
    vis_demo = VisualizationDemo(
        model=model,
        min_size_test=800,
        max_size_test=1333,
        img_format="RGB",
        metadata_dataset="coco_2017_val",
    )
    
    @spaces.GPU
    def inference(img, confidence):
        img = np.array(img)
        _, results = vis_demo.run_on_image(img, confidence)
        results = Image.fromarray(results.get_image()[:, :, ::-1])
        return results 
    
    demo = gr.Interface(
        fn=inference,
        inputs=[
            gr.Image(type="pil", image_mode="RGB"),
            # gr.Number(precision=2, minimum=0.0, maximum=1.0, value=0.5)
            gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.05, label="Confidence")
        ],
        outputs="image",
        examples=[
            
        ],
        title="[CVPR 2025 highlight] v-CLR: View-Consistent Learning for Open-World Instance Segmentation",
        description='''
            [![Paper](https://img.shields.io/badge/arXiv-2412.10028-red)](https://arxiv.org/abs/2412.10028)
            [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mr-detr-instructive-multi-route-training-for/object-detection-on-coco-2017-val)](https://paperswithcode.com/sota/object-detection-on-coco-2017-val?p=mr-detr-instructive-multi-route-training-for)
        '''
    )
    demo.launch()