Spaces:

amos1088
/

test_gradio

Paused

App Files Files Community

amos1088 commited on Dec 18, 2024

Commit

f40f178

1 Parent(s): 5340aac

uuu

Browse files

Files changed (1) hide show

app.py +17 -40

app.py CHANGED Viewed

@@ -4,15 +4,10 @@ import torch
 import gradio as gr
 import spaces
 from huggingface_hub import login
-# from diffusers.utils import load_image
-#
-# from models.transformer_sd3 import SD3Transformer2DModel
-# from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
-import torch
-from diffusers import StableDiffusion3ControlNetPipeline, SD3ControlNetModel
 from diffusers.utils import load_image
-from image_gen_aux import DepthPreprocessor
-from diffusers.models import SD3ControlNetModel, T2IAdapter
 # ----------------------------
 # Step 1: Download IP Adapter if not exists
@@ -37,63 +32,45 @@ if not token:
     raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")
 login(token=token)
-# model_path = 'stabilityai/stable-diffusion-3.5-large'
 ip_adapter_path = './ip-adapter.bin'
 image_encoder_path = "google/siglip-so400m-patch14-384"
-#
-# transformer = SD3Transformer2DModel.from_pretrained(
-#     model_path, subfolder="transformer", torch_dtype=torch.bfloat16
-# )
-#
-# pipe = StableDiffusion3Pipeline.from_pretrained(
-#     model_path, transformer=transformer, torch_dtype=torch.bfloat16
-# ).to("cuda")
-controlnet = SD3ControlNetModel.from_pretrained("stabilityai/stable-diffusion-3.5-large-controlnet-depth", torch_dtype=torch.float16)
-adapter = T2IAdapter.from_pretrained(
-    ip_adapter_path,
     image_encoder_path=image_encoder_path,
     nb_token=64,
-    torch_dtype=torch.float16
 )
-pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-3.5-large",
-    controlnet=controlnet,adapter=adapter,
-    torch_dtype=torch.float16,
-).to("cuda")
 # ----------------------------
 # Step 6: Gradio Function
 # ----------------------------
 @spaces.GPU
 def gui_generation(prompt,negative_prompt, ref_img, guidance_scale, ipadapter_scale):
-    ref_img = load_image(ref_img.name).convert('RGB')
-    image = load_image(ref_img.name)
-    depth_preprocessor = DepthPreprocessor.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf").to("cuda")
-    control_image = depth_preprocessor(image, invert=True)[0].convert("RGB")
-    generator = torch.Generator(device="cpu").manual_seed(0)
-    pipe.set_ip_adapter_scale(ipadapter_scale)  # Adjust the scale as needed
     image = pipe(
         width=1024,
         height=1024,
         prompt=prompt,
         negative_prompt=negative_prompt,
-        control_image=control_image,
         guidance_scale=guidance_scale,
         clip_image=ref_img,
-        num_inference_steps=40,
-        generator=generator,
-        max_sequence_length=77,
     ).images[0]
     return image

 import gradio as gr
 import spaces
 from huggingface_hub import login
 from diffusers.utils import load_image
+from models.transformer_sd3 import SD3Transformer2DModel
+from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
 # ----------------------------
 # Step 1: Download IP Adapter if not exists
     raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")
 login(token=token)
+model_path = 'stabilityai/stable-diffusion-3.5-large'
 ip_adapter_path = './ip-adapter.bin'
 image_encoder_path = "google/siglip-so400m-patch14-384"
+transformer = SD3Transformer2DModel.from_pretrained(
+    model_path, subfolder="transformer", torch_dtype=torch.bfloat16
+)
+pipe = StableDiffusion3Pipeline.from_pretrained(
+    model_path, transformer=transformer, torch_dtype=torch.bfloat16
+).to("cuda")
+pipe.init_ipadapter(
+    ip_adapter_path=ip_adapter_path,
     image_encoder_path=image_encoder_path,
     nb_token=64,
 )
 # ----------------------------
 # Step 6: Gradio Function
 # ----------------------------
 @spaces.GPU
 def gui_generation(prompt,negative_prompt, ref_img, guidance_scale, ipadapter_scale):
+    ref_img = load_image(ref_img.name).convert('RGB')
+    # please note that SD3.5 Large is sensitive to highres generation like 1536x1536
     image = pipe(
         width=1024,
         height=1024,
         prompt=prompt,
         negative_prompt=negative_prompt,
+        num_inference_steps=24,
         guidance_scale=guidance_scale,
+        generator=torch.Generator("cuda").manual_seed(42),
         clip_image=ref_img,
+        ipadapter_scale=ipadapter_scale,
     ).images[0]
     return image