Anurag181011 commited on
Commit
60a8fdc
·
1 Parent(s): 03ce0df
Files changed (1) hide show
  1. app.py +34 -21
app.py CHANGED
@@ -1,17 +1,20 @@
1
  import os
2
  import torch
3
  import gradio as gr
4
- from diffusers import StableDiffusionImg2ImgPipeline
5
  from PIL import Image
6
 
7
  # --- Configuration ---
8
- SPACE_TITLE = "🎨 Studio Ghibli AI Art Generator"
9
- SPACE_DESCRIPTION = "Upload a portrait or a photo and transform it into a breathtaking Studio Ghibli-style masterpiece!"
10
- MODEL_ID = "nitrosocke/Ghibli-Diffusion"
11
- STRENGTH = 0.65
12
- GUIDANCE_SCALE = 5.0
13
- NUM_INFERENCE_STEPS = 25
 
14
  INPUT_IMAGE_SIZE = (512, 512)
 
 
15
 
16
  # --- Device Setup ---
17
  # Force CUDA usage, assuming A100 is the first GPU (index 0)
@@ -31,8 +34,18 @@ except Exception as e:
31
  print(f"⚠️ Torch initialization error: {e}")
32
 
33
  # --- Model Loading ---
34
- pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
35
- MODEL_ID,
 
 
 
 
 
 
 
 
 
 
36
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
37
  use_safetensors=True,
38
  low_cpu_mem_usage=True
@@ -41,22 +54,16 @@ pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
41
  # --- Optimization (Conditional for CUDA) ---
42
  if device == "cuda":
43
  try:
44
- pipe.enable_xformers_memory_efficient_attention()
 
45
  print("✅ xFormers enabled!")
46
  except Exception as e:
47
  print(f"⚠️ xFormers not available: {e}")
48
- pipe.enable_model_cpu_offload() # Keep most of the model on GPU, offload selectively
 
49
  pipe.enable_vae_slicing()
50
  pipe.enable_attention_slicing()
51
 
52
- # --- Prompt Definition ---
53
- GHIBLI_PROMPT = (
54
- "Studio Ghibli anime-style illustration, magical landscape, soft pastel colors, "
55
- "hand-painted textures, cinematic lighting, dreamy atmosphere, vibrant and rich details, "
56
- "Miyazaki-inspired fantasy world, watercolor aesthetic, warm sunlight, intricate composition, "
57
- "high detail, whimsical and nostalgic beauty."
58
- )
59
-
60
  # --- Image Transformation Function ---
61
  def transform_image(input_image):
62
  if input_image is None:
@@ -65,12 +72,18 @@ def transform_image(input_image):
65
  try:
66
  input_image = input_image.resize(INPUT_IMAGE_SIZE)
67
 
68
- output = pipe(
69
- prompt=GHIBLI_PROMPT,
 
 
 
 
 
70
  image=input_image,
71
  strength=STRENGTH,
72
  guidance_scale=GUIDANCE_SCALE,
73
  num_inference_steps=NUM_INFERENCE_STEPS,
 
74
  )
75
 
76
  return output.images[0]
 
1
  import os
2
  import torch
3
  import gradio as gr
4
+ from diffusers import StableDiffusionImg2ImgPipeline, StableDiffusionPipeline
5
  from PIL import Image
6
 
7
  # --- Configuration ---
8
+ SPACE_TITLE = "🎨 Enhanced Studio Ghibli AI Art Generator"
9
+ SPACE_DESCRIPTION = "Upload a portrait or a photo and transform it into a breathtaking Studio Ghibli-style masterpiece! Improved model and prompting for better results."
10
+ MAIN_MODEL_ID = "nitrosocke/Ghibli-Diffusion"
11
+ STYLE_MODEL_ID = "sayakpaul/sd-anime-diff" # Experiment with other anime style models
12
+ STRENGTH = 0.60 # Adjust for better balance between input and style
13
+ GUIDANCE_SCALE = 7.5 # Increased for better prompt adherence
14
+ NUM_INFERENCE_STEPS = 30 # Increased for potentially higher quality
15
  INPUT_IMAGE_SIZE = (512, 512)
16
+ PROMPT_PREFIX = "Studio Ghibli anime-style illustration, "
17
+ NEGATIVE_PROMPT = "ugly, deformed, blurry, low quality, bad anatomy, bad proportions, disfigured, poorly drawn face, mutation, mutated, extra limbs, extra fingers, body horror, glitchy, tiling"
18
 
19
  # --- Device Setup ---
20
  # Force CUDA usage, assuming A100 is the first GPU (index 0)
 
34
  print(f"⚠️ Torch initialization error: {e}")
35
 
36
  # --- Model Loading ---
37
+ # Load the main Ghibli diffusion model for image-to-image
38
+ img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
39
+ MAIN_MODEL_ID,
40
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
41
+ use_safetensors=True,
42
+ low_cpu_mem_usage=True
43
+ ).to(device)
44
+
45
+ # Load a separate Stable Diffusion model for generating initial style (optional, but can help)
46
+ # You can comment this out if you only want to rely on the img2img model
47
+ style_pipe = StableDiffusionPipeline.from_pretrained(
48
+ STYLE_MODEL_ID,
49
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
50
  use_safetensors=True,
51
  low_cpu_mem_usage=True
 
54
  # --- Optimization (Conditional for CUDA) ---
55
  if device == "cuda":
56
  try:
57
+ img2img_pipe.enable_xformers_memory_efficient_attention()
58
+ style_pipe.enable_xformers_memory_efficient_attention()
59
  print("✅ xFormers enabled!")
60
  except Exception as e:
61
  print(f"⚠️ xFormers not available: {e}")
62
+ img2img_pipe.enable_model_cpu_offload()
63
+ style_pipe.enable_model_cpu_offload()
64
  pipe.enable_vae_slicing()
65
  pipe.enable_attention_slicing()
66
 
 
 
 
 
 
 
 
 
67
  # --- Image Transformation Function ---
68
  def transform_image(input_image):
69
  if input_image is None:
 
72
  try:
73
  input_image = input_image.resize(INPUT_IMAGE_SIZE)
74
 
75
+ # Generate an initial stylized image using the style model (optional)
76
+ # initial_style_prompt = f"{PROMPT_PREFIX} portrait of a person"
77
+ # initial_style_image = style_pipe(prompt=initial_style_prompt, negative_prompt=NEGATIVE_PROMPT, num_inference_steps=NUM_INFERENCE_STEPS // 2).images[0]
78
+
79
+ # Use the input image directly with the img2img pipeline
80
+ output = img2img_pipe(
81
+ prompt=PROMPT_PREFIX + "portrait of a person", # Adjust prompt based on input
82
  image=input_image,
83
  strength=STRENGTH,
84
  guidance_scale=GUIDANCE_SCALE,
85
  num_inference_steps=NUM_INFERENCE_STEPS,
86
+ negative_prompt=NEGATIVE_PROMPT,
87
  )
88
 
89
  return output.images[0]