Spaces:

stojnvla
/

LPOSS

Running on Zero

App Files Files Community

LPOSS / app.py

stojnvla

svg icons

fd15d48 about 2 months ago

raw

history blame

3.77 kB

	import gradio as gr
	import PIL
	import numpy as np
	from models.maskclip import MaskClip
	from models.dino import DINO
	import torchvision.transforms as T
	import torch.nn.functional as F
	from lposs import lposs, lposs_plus
	import torch
	import spaces

	device = "cpu"
	if torch.cuda.is_available():
	print("Using GPU")
	device = "cuda"
	# elif torch.backends.mps.is_available():
	# device = "mps"

	print(f"Using device: {device}")

	maskclip = MaskClip().to(device)
	dino = DINO().to(device)
	to_torch_tensor = T.Compose([T.Resize(size=448, max_size=2048), T.ToTensor()])

	@spaces.GPU
	def segment_image(img: PIL.Image.Image, classnames: str, use_lposs_plus: bool \| None) -> tuple[np.ndarray \| PIL.Image.Image \| str, list[tuple[np.ndarray \| tuple[int, int, int, int], str]]]:
	img_tensor = to_torch_tensor(PIL.Image.fromarray(img)).unsqueeze(0).to(device)
	classnames = [c.strip() for c in classnames.split(",")]
	num_classes = len(classnames)

	preds = lposs(maskclip, dino, img_tensor, classnames)
	if use_lposs_plus:
	preds = lposs_plus(img_tensor, preds)
	preds = F.interpolate(preds, size=img.shape[:-1], mode="bilinear", align_corners=False)
	preds = F.softmax(preds * 100, dim=1).cpu().numpy()
	return (img, [(preds[0, i, :, :], classnames[i]) for i in range(num_classes)])

	demo = gr.Interface(
	fn=segment_image,
	inputs=[
	gr.Image(label="Input Image"),
	gr.Textbox(label="Class Names", info="Separate class names with commas"),
	gr.Checkbox(label="Use LPOSS+", info="Enable pixel-level refinement using LPOSS+")
	],
	outputs=[
	gr.AnnotatedImage(label="Segmentation Results")
	],
	title="LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation",
	article="""<div align='center'>
	<a href='http://arxiv.org/abs/2503.19777' target='_blank' style='margin-right: 15px;'>
	<span style='display: inline-flex; align-items: center;'>
	<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"></path><polyline points="14 2 14 8 20 8"></polyline><line x1="16" y1="13" x2="8" y2="13"></line><line x1="16" y1="17" x2="8" y2="17"></line><polyline points="10 9 9 9 8 9"></polyline></svg>
	<span style="margin-left: 5px;">arXiv</span>
	</span>
	</a>
	<a href='https://github.com/vladan-stojnic/LPOSS' target='_blank'>
	<span style='display: inline-flex; align-items: center;'>
	<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/></svg>
	<span style="margin-left: 5px;">GitHub</span>
	</span>
	</a>
	</div>""",
	description="Upload an image and specify the objects you want to segment by listing their names separated by commas.",
	)

	demo.launch()