sculpt / app.py
ds1david's picture
Trying fix variants
3dd7500
raw
history blame
2.68 kB
import gradio as gr
import torch
import numpy as np
from diffusers import StableDiffusionXLImg2ImgPipeline
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from PIL import Image, ImageEnhance, ImageOps
# Configuração de dispositivo
device = "cpu" # or "cuda" if you have a GPU
torch_dtype = torch.float32
print("Carregando modelo SDXL Img2Img...")
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch_dtype
).to(device)
print("Carregando pesos LoRA weights with PEFT...")
pipe.load_lora_weights(
"KappaNeuro/bas-relief",
weight_name="BAS-RELIEF.safetensors",
peft_backend="peft"
)
print("Carregando modelo de profundidade...")
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
def processar_profundidade(depth_arr: np.ndarray) -> Image.Image:
d_min, d_max = depth_arr.min(), depth_arr.max()
depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
depth_stretched = (depth_stretched * 255).astype(np.uint8)
depth_pil = Image.fromarray(depth_stretched)
depth_pil = ImageOps.autocontrast(depth_pil)
enhancer = ImageEnhance.Sharpness(depth_pil)
depth_pil = enhancer.enhance(2.0)
return depth_pil
def processar_imagem(imagem: Image.Image):
# Pré-processamento
print("Generating image with LoRA style...")
result = pipe(
prompt=full_prompt,
image=imagem,
num_inference_steps=15, # reduce if too slow
guidance_scale=7.5,
height=512, # reduce if you still get timeouts
width=512
)
image = result.images[0]
print("Running DPT Depth Estimation...")
inputs = feature_extractor(image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = depth_model(**inputs)
predicted_depth = outputs.predicted_depth
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False
).squeeze()
depth_map_pil = processar_profundidade(prediction.cpu().numpy())
return image, depth_map_pil
# return resultado.images[0], processar_profundidade(depth_map)
# Interface Gradio
interface = gr.Interface(
fn=processar_imagem,
inputs=gr.Image(type="pil"),
outputs=[gr.Image(label="Resultado"), gr.Image(label="Profundidade")],
title="Conversor para Baixo-relevo",
description="Transforme imagens em baixo-relevo com mapa de profundidade"
)
if __name__ == "__main__":
interface.launch()