Spaces:

Kidbea
/

text-to-video

Sleeping

App Files Files Community

Anurag181011 commited on Mar 28

Commit

60a8fdc

1 Parent(s): 03ce0df

dfsdfsd

Browse files

Files changed (1) hide show

app.py +34 -21

app.py CHANGED Viewed

@@ -1,17 +1,20 @@
 import os
 import torch
 import gradio as gr
-from diffusers import StableDiffusionImg2ImgPipeline
 from PIL import Image
 # --- Configuration ---
-SPACE_TITLE = "🎨 Studio Ghibli AI Art Generator"
-SPACE_DESCRIPTION = "Upload a portrait or a photo and transform it into a breathtaking Studio Ghibli-style masterpiece!"
-MODEL_ID = "nitrosocke/Ghibli-Diffusion"
-STRENGTH = 0.65
-GUIDANCE_SCALE = 5.0
-NUM_INFERENCE_STEPS = 25
 INPUT_IMAGE_SIZE = (512, 512)
 # --- Device Setup ---
 # Force CUDA usage, assuming A100 is the first GPU (index 0)
@@ -31,8 +34,18 @@ except Exception as e:
     print(f"⚠️ Torch initialization error: {e}")
 # --- Model Loading ---
-pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-    MODEL_ID,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32,
     use_safetensors=True,
     low_cpu_mem_usage=True
@@ -41,22 +54,16 @@ pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
 # --- Optimization (Conditional for CUDA) ---
 if device == "cuda":
     try:
-        pipe.enable_xformers_memory_efficient_attention()
         print("✅ xFormers enabled!")
     except Exception as e:
         print(f"⚠️ xFormers not available: {e}")
-    pipe.enable_model_cpu_offload() # Keep most of the model on GPU, offload selectively
 pipe.enable_vae_slicing()
 pipe.enable_attention_slicing()
-# --- Prompt Definition ---
-GHIBLI_PROMPT = (
-    "Studio Ghibli anime-style illustration, magical landscape, soft pastel colors, "
-    "hand-painted textures, cinematic lighting, dreamy atmosphere, vibrant and rich details, "
-    "Miyazaki-inspired fantasy world, watercolor aesthetic, warm sunlight, intricate composition, "
-    "high detail, whimsical and nostalgic beauty."
-)
 # --- Image Transformation Function ---
 def transform_image(input_image):
     if input_image is None:
@@ -65,12 +72,18 @@ def transform_image(input_image):
     try:
         input_image = input_image.resize(INPUT_IMAGE_SIZE)
-        output = pipe(
-            prompt=GHIBLI_PROMPT,
             image=input_image,
             strength=STRENGTH,
             guidance_scale=GUIDANCE_SCALE,
             num_inference_steps=NUM_INFERENCE_STEPS,
         )
         return output.images[0]

 import os
 import torch
 import gradio as gr
+from diffusers import StableDiffusionImg2ImgPipeline, StableDiffusionPipeline
 from PIL import Image
 # --- Configuration ---
+SPACE_TITLE = "🎨 Enhanced Studio Ghibli AI Art Generator"
+SPACE_DESCRIPTION = "Upload a portrait or a photo and transform it into a breathtaking Studio Ghibli-style masterpiece! Improved model and prompting for better results."
+MAIN_MODEL_ID = "nitrosocke/Ghibli-Diffusion"
+STYLE_MODEL_ID = "sayakpaul/sd-anime-diff"  # Experiment with other anime style models
+STRENGTH = 0.60  # Adjust for better balance between input and style
+GUIDANCE_SCALE = 7.5  # Increased for better prompt adherence
+NUM_INFERENCE_STEPS = 30  # Increased for potentially higher quality
 INPUT_IMAGE_SIZE = (512, 512)
+PROMPT_PREFIX = "Studio Ghibli anime-style illustration, "
+NEGATIVE_PROMPT = "ugly, deformed, blurry, low quality, bad anatomy, bad proportions, disfigured, poorly drawn face, mutation, mutated, extra limbs, extra fingers, body horror, glitchy, tiling"
 # --- Device Setup ---
 # Force CUDA usage, assuming A100 is the first GPU (index 0)
     print(f"⚠️ Torch initialization error: {e}")
 # --- Model Loading ---
+# Load the main Ghibli diffusion model for image-to-image
+img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+    MAIN_MODEL_ID,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    use_safetensors=True,
+    low_cpu_mem_usage=True
+).to(device)
+# Load a separate Stable Diffusion model for generating initial style (optional, but can help)
+# You can comment this out if you only want to rely on the img2img model
+style_pipe = StableDiffusionPipeline.from_pretrained(
+    STYLE_MODEL_ID,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32,
     use_safetensors=True,
     low_cpu_mem_usage=True
 # --- Optimization (Conditional for CUDA) ---
 if device == "cuda":
     try:
+        img2img_pipe.enable_xformers_memory_efficient_attention()
+        style_pipe.enable_xformers_memory_efficient_attention()
         print("✅ xFormers enabled!")
     except Exception as e:
         print(f"⚠️ xFormers not available: {e}")
+    img2img_pipe.enable_model_cpu_offload()
+    style_pipe.enable_model_cpu_offload()
 pipe.enable_vae_slicing()
 pipe.enable_attention_slicing()
 # --- Image Transformation Function ---
 def transform_image(input_image):
     if input_image is None:
     try:
         input_image = input_image.resize(INPUT_IMAGE_SIZE)
+        # Generate an initial stylized image using the style model (optional)
+        # initial_style_prompt = f"{PROMPT_PREFIX} portrait of a person"
+        # initial_style_image = style_pipe(prompt=initial_style_prompt, negative_prompt=NEGATIVE_PROMPT, num_inference_steps=NUM_INFERENCE_STEPS // 2).images[0]
+        # Use the input image directly with the img2img pipeline
+        output = img2img_pipe(
+            prompt=PROMPT_PREFIX + "portrait of a person", # Adjust prompt based on input
             image=input_image,
             strength=STRENGTH,
             guidance_scale=GUIDANCE_SCALE,
             num_inference_steps=NUM_INFERENCE_STEPS,
+            negative_prompt=NEGATIVE_PROMPT,
         )
         return output.images[0]