ghibli

Runtime error

App Files Files Community

ar08 commited on Apr 6

Commit

0040d2b

verified ·

1 Parent(s): 114b2a4

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -56

app.py CHANGED Viewed

@@ -4,30 +4,35 @@ import numpy as np
 from diffusers import StableDiffusionImg2ImgPipeline
 from PIL import Image
 from typing import Generator, List
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "nitrosocke/Ghibli-Diffusion"
-# Load the pipeline
 pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
     model_id,
-    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
 )
 pipe = pipe.to(device)
-pipe.enable_attention_slicing()
 def resize_and_crop(image: Image.Image, target_size: int = 512) -> Image.Image:
-    """Resize and crop the image to the target size while maintaining aspect ratio."""
     width, height = image.size
-    if width > height:
-        left = (width - height) // 2
-        right = left + height
-        image = image.crop((left, 0, right, height))
-    elif height > width:
-        top = (height - width) // 2
-        bottom = top + width
-        image = image.crop((0, top, width, bottom))
-    return image.resize((target_size, target_size))
 def generate_ghibli_style(
     input_image: Image.Image,
@@ -35,90 +40,103 @@ def generate_ghibli_style(
     strength: float = 0.6,
     guidance_scale: float = 7.5
 ) -> Generator[Image.Image, None, None]:
-    """Generator that yields intermediate images at each diffusion step."""
     prompt = "ghibli style, detailed anime portrait, studio ghibli, anime artwork"
     negative_prompt = "blurry, low quality, sketch, cartoon, 3d, deformed, disfigured"
-    # Preprocess image
     input_image = resize_and_crop(input_image)
     init_image = input_image.convert("RGB")
-    # Prepare latent variables
-    init_image = pipe.image_processor.preprocess(init_image)
-    init_latents = pipe.vae.encode(init_image.to(device)).latent_dist.sample()
     init_latents = pipe.vae.config.scaling_factor * init_latents
-    # Prepare scheduler
     pipe.scheduler.set_timesteps(steps, device=device)
     timesteps = pipe.scheduler.timesteps[int(steps * strength):]
-    noise = torch.randn_like(init_latents)
     latents = pipe.scheduler.add_noise(init_latents, noise, timesteps[:1])
-    # Prepare text embeddings
     text_inputs = pipe.tokenizer(
         prompt,
         padding="max_length",
         max_length=pipe.tokenizer.model_max_length,
         return_tensors="pt"
     )
-    text_embeddings = pipe.text_encoder(text_inputs.input_ids.to(device))[0]
-    # Unconditional embedding
     uncond_input = pipe.tokenizer(
-        [negative_prompt] * init_image.shape[0],
         padding="max_length",
         max_length=text_embeddings.shape[1],
         return_tensors="pt"
     )
-    uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(device))[0]
-    # Classifier-free guidance
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
-    # Diffusion process
     for i, t in enumerate(gr.Progress().tqdm(timesteps, desc="Generating")):
-        # Expand latents for classifier-free guidance
-        latent_model_input = torch.cat([latents] * 2)
-        latent_model_input = pipe.scheduler.scale_model_input(latent_model_input, t)
-        # Predict noise
-        noise_pred = pipe.unet(
-            latent_model_input,
-            t,
-            encoder_hidden_states=text_embeddings
-        ).sample
-        # Perform guidance
-        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-        # Compute previous step
-        latents = pipe.scheduler.step(noise_pred, t, latents).prev_sample
-        # Decode and yield image
         with torch.no_grad():
             image = pipe.vae.decode(latents / pipe.vae.config.scaling_factor, return_dict=False)[0]
             image = pipe.image_processor.postprocess(image, output_type="pil")[0]
         yield image
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# ✨ Studio Ghibli Style Transformer ✨")
-    gr.Markdown("Upload a portrait photo to transform it into a Studio Ghibli-style artwork!")
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(label="Input Image", type="pil")
-            steps_slider = gr.Slider(10, 50, value=25, label="Number of Steps")
-            strength_slider = gr.Slider(0.1, 0.9, value=0.6, label="Transformation Strength")
             generate_btn = gr.Button("✨ Transform!", variant="primary")
         with gr.Column():
             gallery = gr.Gallery(
                 label="Generation Progress",
                 show_label=True,
-                columns=5,
                 preview=True,
                 object_fit="contain",
                 height=600
@@ -131,8 +149,6 @@ with gr.Blocks() as demo:
         concurrency_limit=1
     )
 if __name__ == "__main__":
     demo.launch()

 from diffusers import StableDiffusionImg2ImgPipeline
 from PIL import Image
 from typing import Generator, List
+import gc
+import os
+# Configure CPU optimization
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
+torch.set_num_threads(1)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "nitrosocke/Ghibli-Diffusion"
+# Memory-optimized pipeline loading
 pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
     model_id,
+    torch_dtype=torch.float32,  # Keep float32 for CPU stability
 )
 pipe = pipe.to(device)
+pipe.enable_attention_slicing(slice_size=4)
+pipe.enable_sequential_cpu_offload() if device == "cuda" else None
 def resize_and_crop(image: Image.Image, target_size: int = 512) -> Image.Image:
+    """Optimized image preprocessing with downsampling"""
+    width, height = image.size
+    scale = max(target_size/width, target_size/height)
+    image = image.resize((int(width*scale), int(height*scale)), Image.LANCZOS)
     width, height = image.size
+    left = (width - target_size) // 2
+    top = (height - target_size) // 2
+    return image.crop((left, top, left+target_size, top+target_size))
 def generate_ghibli_style(
     input_image: Image.Image,
     strength: float = 0.6,
     guidance_scale: float = 7.5
 ) -> Generator[Image.Image, None, None]:
+    """Memory-optimized generator with aggressive cleanup"""
     prompt = "ghibli style, detailed anime portrait, studio ghibli, anime artwork"
     negative_prompt = "blurry, low quality, sketch, cartoon, 3d, deformed, disfigured"
+    # Preprocess with garbage collection
     input_image = resize_and_crop(input_image)
     init_image = input_image.convert("RGB")
+    del input_image
+    gc.collect()
+    # Prepare latent variables with memory mapping
+    init_tensor = pipe.image_processor.preprocess(init_image).to(device=device, dtype=torch.float32)
+    init_latents = pipe.vae.encode(init_tensor).latent_dist.sample()
     init_latents = pipe.vae.config.scaling_factor * init_latents
+    del init_tensor
+    gc.collect()
+    # Configure scheduler
     pipe.scheduler.set_timesteps(steps, device=device)
     timesteps = pipe.scheduler.timesteps[int(steps * strength):]
+    noise = torch.randn_like(init_latents, device=device)
     latents = pipe.scheduler.add_noise(init_latents, noise, timesteps[:1])
+    del init_latents, noise
+    gc.collect()
+    # Memory-efficient text encoding
     text_inputs = pipe.tokenizer(
         prompt,
         padding="max_length",
         max_length=pipe.tokenizer.model_max_length,
         return_tensors="pt"
     )
+    text_embeddings = pipe.text_encoder(text_inputs.input_ids.to(device))[0].to(torch.float32)
     uncond_input = pipe.tokenizer(
+        [negative_prompt],
         padding="max_length",
         max_length=text_embeddings.shape[1],
         return_tensors="pt"
     )
+    uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(device))[0].to(torch.float32)
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
+    del uncond_embeddings, uncond_input, text_inputs
+    gc.collect()
+    # Diffusion process with memory cleanup
     for i, t in enumerate(gr.Progress().tqdm(timesteps, desc="Generating")):
+        # Memory-optimized UNet inference
+        with torch.inference_mode():
+            latent_model_input = torch.cat([latents] * 2)
+            latent_model_input = pipe.scheduler.scale_model_input(latent_model_input, t)
+            noise_pred = pipe.unet(
+                latent_model_input,
+                t,
+                encoder_hidden_states=text_embeddings,
+                return_dict=False,
+            )[0]
+            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+            noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+            latents = pipe.scheduler.step(noise_pred, t, latents).prev_sample
+        # Memory-efficient decoding
         with torch.no_grad():
             image = pipe.vae.decode(latents / pipe.vae.config.scaling_factor, return_dict=False)[0]
             image = pipe.image_processor.postprocess(image, output_type="pil")[0]
         yield image
+        # Aggressive memory cleanup
+        del latent_model_input, noise_pred, noise_pred_uncond, noise_pred_text
+        gc.collect()
+    # Final cleanup
+    del latents, text_embeddings
+    gc.collect()
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# ✨ Studio Ghibli Style Transformer (CPU Optimized) ✨")
+    gr.Markdown("Upload a portrait photo to transform it into a Studio Ghibli-style artwork (max 10GB RAM usage)!")
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(label="Input Image", type="pil")
+            steps_slider = gr.Slider(10, 40, value=25, step=5, label="Number of Steps")
+            strength_slider = gr.Slider(0.4, 0.8, value=0.6, step=0.1, label="Transformation Strength")
             generate_btn = gr.Button("✨ Transform!", variant="primary")
         with gr.Column():
             gallery = gr.Gallery(
                 label="Generation Progress",
                 show_label=True,
+                columns=4,
                 preview=True,
                 object_fit="contain",
                 height=600
         concurrency_limit=1
     )
 if __name__ == "__main__":
+    demo.queue(concurrency_count=1)
     demo.launch()