ghibli

Runtime error

App Files Files Community

ar08 commited on Apr 6

Commit

ee9363c

verified ·

1 Parent(s): 0040d2b

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -137

app.py CHANGED Viewed

@@ -1,154 +1,58 @@
 import gradio as gr
 import torch
-import numpy as np
 from diffusers import StableDiffusionImg2ImgPipeline
 from PIL import Image
-from typing import Generator, List
-import gc
-import os
-# Configure CPU optimization
-os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["MKL_NUM_THREADS"] = "1"
-torch.set_num_threads(1)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "nitrosocke/Ghibli-Diffusion"
-# Memory-optimized pipeline loading
 pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
     model_id,
-    torch_dtype=torch.float32,  # Keep float32 for CPU stability
 )
-pipe = pipe.to(device)
-pipe.enable_attention_slicing(slice_size=4)
-pipe.enable_sequential_cpu_offload() if device == "cuda" else None
-def resize_and_crop(image: Image.Image, target_size: int = 512) -> Image.Image:
-    """Optimized image preprocessing with downsampling"""
-    width, height = image.size
-    scale = max(target_size/width, target_size/height)
-    image = image.resize((int(width*scale), int(height*scale)), Image.LANCZOS)
-    width, height = image.size
-    left = (width - target_size) // 2
-    top = (height - target_size) // 2
-    return image.crop((left, top, left+target_size, top+target_size))
-def generate_ghibli_style(
-    input_image: Image.Image,
-    steps: int = 25,
-    strength: float = 0.6,
-    guidance_scale: float = 7.5
-) -> Generator[Image.Image, None, None]:
-    """Memory-optimized generator with aggressive cleanup"""
-    prompt = "ghibli style, detailed anime portrait, studio ghibli, anime artwork"
-    negative_prompt = "blurry, low quality, sketch, cartoon, 3d, deformed, disfigured"
-    # Preprocess with garbage collection
-    input_image = resize_and_crop(input_image)
-    init_image = input_image.convert("RGB")
-    del input_image
-    gc.collect()
-    # Prepare latent variables with memory mapping
-    init_tensor = pipe.image_processor.preprocess(init_image).to(device=device, dtype=torch.float32)
-    init_latents = pipe.vae.encode(init_tensor).latent_dist.sample()
-    init_latents = pipe.vae.config.scaling_factor * init_latents
-    del init_tensor
-    gc.collect()
-    # Configure scheduler
-    pipe.scheduler.set_timesteps(steps, device=device)
-    timesteps = pipe.scheduler.timesteps[int(steps * strength):]
-    noise = torch.randn_like(init_latents, device=device)
-    latents = pipe.scheduler.add_noise(init_latents, noise, timesteps[:1])
-    del init_latents, noise
-    gc.collect()
-    # Memory-efficient text encoding
-    text_inputs = pipe.tokenizer(
-        prompt,
-        padding="max_length",
-        max_length=pipe.tokenizer.model_max_length,
-        return_tensors="pt"
-    )
-    text_embeddings = pipe.text_encoder(text_inputs.input_ids.to(device))[0].to(torch.float32)
-    uncond_input = pipe.tokenizer(
-        [negative_prompt],
-        padding="max_length",
-        max_length=text_embeddings.shape[1],
-        return_tensors="pt"
-    )
-    uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(device))[0].to(torch.float32)
-    text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
-    del uncond_embeddings, uncond_input, text_inputs
-    gc.collect()
-    # Diffusion process with memory cleanup
-    for i, t in enumerate(gr.Progress().tqdm(timesteps, desc="Generating")):
-        # Memory-optimized UNet inference
-        with torch.inference_mode():
-            latent_model_input = torch.cat([latents] * 2)
-            latent_model_input = pipe.scheduler.scale_model_input(latent_model_input, t)
-            noise_pred = pipe.unet(
-                latent_model_input,
-                t,
-                encoder_hidden_states=text_embeddings,
-                return_dict=False,
-            )[0]
-            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-            noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-            latents = pipe.scheduler.step(noise_pred, t, latents).prev_sample
-        # Memory-efficient decoding
         with torch.no_grad():
-            image = pipe.vae.decode(latents / pipe.vae.config.scaling_factor, return_dict=False)[0]
-            image = pipe.image_processor.postprocess(image, output_type="pil")[0]
-        yield image
-        # Aggressive memory cleanup
-        del latent_model_input, noise_pred, noise_pred_uncond, noise_pred_text
-        gc.collect()
-    # Final cleanup
-    del latents, text_embeddings
-    gc.collect()
-# Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# ✨ Studio Ghibli Style Transformer (CPU Optimized) ✨")
-    gr.Markdown("Upload a portrait photo to transform it into a Studio Ghibli-style artwork (max 10GB RAM usage)!")
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(label="Input Image", type="pil")
-            steps_slider = gr.Slider(10, 40, value=25, step=5, label="Number of Steps")
-            strength_slider = gr.Slider(0.4, 0.8, value=0.6, step=0.1, label="Transformation Strength")
-            generate_btn = gr.Button("✨ Transform!", variant="primary")
-        with gr.Column():
-            gallery = gr.Gallery(
-                label="Generation Progress",
-                show_label=True,
-                columns=4,
-                preview=True,
-                object_fit="contain",
-                height=600
-            )
-    generate_btn.click(
-        fn=generate_ghibli_style,
-        inputs=[input_image, steps_slider, strength_slider],
-        outputs=gallery,
-        concurrency_limit=1
-    )
-if __name__ == "__main__":
-    demo.queue(concurrency_count=1)
-    demo.launch()

 import gradio as gr
 import torch
 from diffusers import StableDiffusionImg2ImgPipeline
 from PIL import Image
+import numpy as np
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "nitrosocke/Ghibli-Diffusion"
+# Load the model (keep safety_checker to avoid warning)
 pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
     model_id,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
 )
+pipe.to(device)
+pipe.enable_attention_slicing()
+# Function to convert PIL image to latent-compatible numpy
+def pil_to_np(image):
+    return np.array(image).astype(np.uint8)
+# Generator with step-wise callback
+def generate_ghibli_style(image, steps=25):
+    prompt = "ghibli style portrait"
+    intermediate_images = []
+    def callback(step: int, timestep: int, latents):
         with torch.no_grad():
+            img = pipe.decode_latents(latents)
+            img = pipe.numpy_to_pil(img)[0]
+        intermediate_images.append(img)
+    with torch.inference_mode():
+        pipe(
+            prompt=prompt,
+            image=image,
+            strength=0.6,
+            guidance_scale=6.0,
+            num_inference_steps=steps,
+            callback=callback,
+            callback_steps=1,
+        )
+    return intermediate_images
+# Gradio Interface without deprecated style()
+iface = gr.Interface(
+    fn=generate_ghibli_style,
+    inputs=[
+        gr.Image(type="pil", label="Upload a photo"),
+        gr.Slider(minimum=10, maximum=50, value=25, step=1, label="Inference Steps")
+    ],
+    outputs=gr.Gallery(label="Ghibli-style Generation Progress"),
+    title="✨ Studio Ghibli Portrait Generator ✨",
+    description="Upload a photo and watch it transform into a Ghibli-style portrait step by step!"
+)
+iface.launch()