Spaces:

Kidbea
/

text-to-video

Sleeping

App Files Files Community

Anurag Bhardwaj commited on Mar 27

Commit

de1e61c

verified ·

1 Parent(s): fbe554c

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -108

app.py CHANGED Viewed

@@ -1,129 +1,216 @@
-import os
 import sys
 import subprocess
 import importlib.util
-# Required packages including huggingface_hub for downloading files.
 required_packages = {
     "gradio": "gradio",
-    "diffusers": "diffusers",
     "torch": "torch",
     "PIL": "pillow",
-    "transformers": "transformers",
-    "safetensors": "safetensors",
-    "huggingface_hub": "huggingface_hub"
 }
 def install_package(package_name):
-    """Install package using pip."""
     subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
-# Check and install missing packages.
-for module_name, pip_name in required_packages.items():
-    if importlib.util.find_spec(module_name) is None:
-        print(f"Package {module_name} not found. Installing {pip_name}...")
-        install_package(pip_name)
-# Now import the required packages.
 import gradio as gr
 import torch
 from PIL import Image
-from diffusers import StableDiffusionImg2ImgPipeline
-from safetensors.torch import load_file
-from huggingface_hub import hf_hub_download
-def monkeypatch_lora(unet, lora_path, alpha=1.0):
-    """
-    Merge LoRA weights into the UNet model.
-    This function loads a LoRA weights file (safetensors format) and applies the deltas
-    to the corresponding base weights of the UNet.
-    """
-    print(f"Loading LoRA weights from: {lora_path}")
-    lora_state = load_file(lora_path)
-    unet_state = unet.state_dict()
-    for key, delta in lora_state.items():
-        if "lora_up" in key or "lora_down" in key:
-            base_key = key.replace("lora_up", "weight").replace("lora_down", "weight")
-            if base_key in unet_state:
-                unet_state[base_key] = unet_state[base_key] + delta.to(unet_state[base_key].device) * alpha
-                print(f"Applied LoRA delta for {base_key}")
-            else:
-                print(f"Warning: Base weight {base_key} not found in UNet state dict.")
-        else:
-            print(f"Skipping key {key} as it does not appear to be a LoRA weight.")
-    unet.load_state_dict(unet_state)
-    print("LoRA merging completed.")
-def load_model():
-    """
-    Load the base Stable Diffusion model and apply the FLUX.1-dev LoRA weights.
-    If the LoRA weights file is not found locally, it will be downloaded from the Hugging Face Hub.
-    """
-    base_model_id = "runwayml/stable-diffusion-v1-5"
-    hf_token = os.environ.get("HF_TOKEN")
-    if hf_token is None:
-        raise ValueError("HF_TOKEN environment variable is not set. Please set it to access gated repositories.")
-    # Load the base model.
-    pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-        base_model_id,
-        torch_dtype=torch.float16,
-        use_auth_token=hf_token
-    )
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    pipe = pipe.to(device)
-    # Define the expected local path for the LoRA weights.
-    lora_weights_path = "./flux_ghibsky_lora.safetensors"
-    # If the file does not exist locally, attempt to download it.
-    if not os.path.exists(lora_weights_path):
-        print(f"LoRA weights file not found at {lora_weights_path}. Attempting to download from Hugging Face Hub...")
-        # Download the file from the gated repository.
-        lora_weights_path = hf_hub_download(
-            repo_id="black-forest-labs/FLUX.1-dev",
-            filename="flux_ghibsky_lora.safetensors",
-            use_auth_token=hf_token
         )
-        print(f"Downloaded LoRA weights to {lora_weights_path}.")
-    # Apply the LoRA weights to the UNet.
-    monkeypatch_lora(pipe.unet, lora_weights_path, alpha=1.0)
-    print("Base model loaded and FLUX.1-dev LoRA weights merged.")
-    return pipe
-# Load the model once at startup.
-pipe = load_model()
-def transform_image(image: Image.Image, strength: float, steps: int) -> Image.Image:
-    """
-    Transforms the uploaded image into Ghibli-inspired art.
-    The prompt is prefixed with "GHIBSKY style".
-    """
-    prompt = (
-        "GHIBSKY style, a portrait transformed into dreamy, Ghibli-inspired art, "
-        "featuring serene skies, surreal details, and intricate brush strokes"
-    )
-    result = pipe(prompt=prompt, image=image, strength=strength, num_inference_steps=steps)
-    return result.images[0]
-# Create a Gradio interface.
-demo = gr.Interface(
-    fn=transform_image,
-    inputs=[
-        gr.Image(type="pil", label="Upload your portrait image"),
-        gr.Slider(0.1, 0.9, value=0.6, label="Transformation Strength"),
-        gr.Slider(20, 100, step=5, value=50, label="Inference Steps")
-    ],
-    outputs=gr.Image(type="pil", label="Ghibli-Inspired Art"),
-    title="GHIBSKY Art Transformer",
-    description=(
-        "Upload your portrait image and see it transformed into enchanting, Ghibli-inspired art. "
-        "This demo uses a base Stable Diffusion model with FLUX.1-dev LoRA weights merged into it "
-        "to achieve the unique GHIBSKY style. Ensure your HF_TOKEN is set to access gated repositories."
     )
-)
-if __name__ == "__main__":
-    demo.launch()

 import sys
 import subprocess
 import importlib.util
+import os
+# List of required packages.
 required_packages = {
     "gradio": "gradio",
+    "numpy": "numpy",
     "torch": "torch",
+    "diffusers": "diffusers",
     "PIL": "pillow",
+    "spaces": "spaces"  # If this is a custom package in your environment.
 }
 def install_package(package_name):
     subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
+# Auto-install any missing packages.
+for mod, pkg in required_packages.items():
+    if importlib.util.find_spec(mod) is None:
+        print(f"Module {mod} not found, installing {pkg}...")
+        install_package(pkg)
+import random
 import gradio as gr
+import numpy as np
+import spaces
 import torch
+from diffusers import DiffusionPipeline
 from PIL import Image
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Model identifiers.
+repo_id = "black-forest-labs/FLUX.1-dev"
+adapter_id = "alvarobartt/ghibli-characters-flux-lora"
+# Retrieve HF token from environment (if required to access gated repositories).
+hf_token = os.environ.get("HF_TOKEN", None)
+# Load the base model from the repository.
+pipeline = DiffusionPipeline.from_pretrained(
+    repo_id,
+    torch_dtype=torch.bfloat16,
+    use_auth_token=hf_token  # Only needed if the repo is gated.
+)
+pipeline.load_lora_weights(adapter_id)
+pipeline = pipeline.to(device)
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
+@spaces.GPU(duration=80)
+def inference(
+    prompt: str,
+    seed: int,
+    randomize_seed: bool,
+    width: int,
+    height: int,
+    guidance_scale: float,
+    num_inference_steps: int,
+    lora_scale: float,
+    progress: gr.Progress = gr.Progress(track_tqdm=True),
+):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device=device).manual_seed(seed)
+    image = pipeline(
+        prompt=prompt,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+        width=width,
+        height=height,
+        generator=generator,
+        joint_attention_kwargs={"scale": lora_scale},
+    ).images[0]
+    return image, seed
+examples = [
+    (
+        "Ghibli style futuristic stormtrooper with glossy white armor and a sleek helmet,"
+        " standing heroically on a lush alien planet, vibrant flowers blooming around, soft"
+        " sunlight illuminating the scene, a gentle breeze rustling the leaves"
+    ),
+]
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 640px;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown("# FLUX.1 Studio Ghibli LoRA")
+        gr.Markdown(
+            "[alvarobartt/ghibli-characters-flux-lora](https://huggingface.co/alvarobartt/ghibli-characters-flux-lora)"
+            " is a LoRA fine-tune of [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)"
+            " with [alvarobartt/ghibli-characters](https://huggingface.co/datasets/alvarobartt/ghibli-characters)."
         )
+        with gr.Accordion("How to generate nice prompts?", open=False):
+            gr.Markdown(
+                "What worked best for me to generate high-quality prompts of well-known characters,"
+                " was to prompt either [Claude 3 Haiku](https://claude.ai), [GPT4-o](https://chatgpt.com/),"
+                " or [Perplexity](https://www.perplexity.ai/) with:\n\nYou are an"
+                " expert prompt writer for diffusion text to image models, and you've been provided"
+                " the following prompt template:\n\n\"Ghibli style [character description] with"
+                " [distinctive features], [action or pose], [environment or background],"
+                " [lighting or atmosphere], [additional details].\"\n\nCould you create a prompt"
+                " to generate [CHARACTER NAME] as a Studio Ghibli character following that template? "
+                "[MORE DETAILS IF NEEDED]\n"
+            )
+        with gr.Row():
+            prompt = gr.Text(
+                label="Prompt",
+                show_label=False,
+                max_lines=1,
+                placeholder="Enter your prompt",
+                container=False,
+            )
+            run_button = gr.Button("Run", scale=0)
+        result = gr.Image(label="Result", show_label=False)
+        with gr.Accordion("Advanced Settings", open=False):
+            seed = gr.Slider(
+                label="Seed",
+                minimum=0,
+                maximum=MAX_SEED,
+                step=1,
+                value=42,
+            )
+            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+            with gr.Row():
+                width = gr.Slider(
+                    label="Width",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=1024,
+                )
+                height = gr.Slider(
+                    label="Height",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=768,
+                )
+            with gr.Row():
+                guidance_scale = gr.Slider(
+                    label="Guidance scale",
+                    minimum=0.0,
+                    maximum=10.0,
+                    step=0.1,
+                    value=3.5,
+                )
+                num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=30,
+                )
+                lora_scale = gr.Slider(
+                    label="LoRA scale",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.1,
+                    value=1.0,
+                )
+        gr.Examples(
+            examples=examples,
+            fn=lambda x: (Image.open("./example.jpg"), 42),
+            inputs=[prompt],
+            outputs=[result, seed],
+            run_on_click=True,
+        )
+        gr.Markdown(
+            "### Disclaimer\n\n"
+            "License is non-commercial for both FLUX.1-dev and the Studio Ghibli dataset; "
+            "but free to use for personal and non-commercial purposes."
+        )
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn=inference,
+        inputs=[
+            prompt,
+            seed,
+            randomize_seed,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+            lora_scale,
+        ],
+        outputs=[result, seed],
     )
+demo.queue()
+demo.launch()