amos1088 commited on
Commit
f40f178
·
1 Parent(s): 5340aac
Files changed (1) hide show
  1. app.py +17 -40
app.py CHANGED
@@ -4,15 +4,10 @@ import torch
4
  import gradio as gr
5
  import spaces
6
  from huggingface_hub import login
7
- # from diffusers.utils import load_image
8
- #
9
- # from models.transformer_sd3 import SD3Transformer2DModel
10
- # from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
11
- import torch
12
- from diffusers import StableDiffusion3ControlNetPipeline, SD3ControlNetModel
13
  from diffusers.utils import load_image
14
- from image_gen_aux import DepthPreprocessor
15
- from diffusers.models import SD3ControlNetModel, T2IAdapter
 
16
 
17
  # ----------------------------
18
  # Step 1: Download IP Adapter if not exists
@@ -37,63 +32,45 @@ if not token:
37
  raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")
38
  login(token=token)
39
 
40
- # model_path = 'stabilityai/stable-diffusion-3.5-large'
41
  ip_adapter_path = './ip-adapter.bin'
42
  image_encoder_path = "google/siglip-so400m-patch14-384"
43
- #
44
- # transformer = SD3Transformer2DModel.from_pretrained(
45
- # model_path, subfolder="transformer", torch_dtype=torch.bfloat16
46
- # )
47
- #
48
- # pipe = StableDiffusion3Pipeline.from_pretrained(
49
- # model_path, transformer=transformer, torch_dtype=torch.bfloat16
50
- # ).to("cuda")
51
-
52
 
 
 
 
53
 
54
- controlnet = SD3ControlNetModel.from_pretrained("stabilityai/stable-diffusion-3.5-large-controlnet-depth", torch_dtype=torch.float16)
 
 
55
 
56
- adapter = T2IAdapter.from_pretrained(
57
- ip_adapter_path,
58
  image_encoder_path=image_encoder_path,
59
  nb_token=64,
60
- torch_dtype=torch.float16
61
  )
62
 
63
- pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
64
- "stabilityai/stable-diffusion-3.5-large",
65
- controlnet=controlnet,adapter=adapter,
66
- torch_dtype=torch.float16,
67
- ).to("cuda")
68
-
69
-
70
-
71
 
72
  # ----------------------------
73
  # Step 6: Gradio Function
74
  # ----------------------------
75
  @spaces.GPU
76
  def gui_generation(prompt,negative_prompt, ref_img, guidance_scale, ipadapter_scale):
77
- ref_img = load_image(ref_img.name).convert('RGB')
78
- image = load_image(ref_img.name)
79
 
80
- depth_preprocessor = DepthPreprocessor.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf").to("cuda")
81
- control_image = depth_preprocessor(image, invert=True)[0].convert("RGB")
82
 
83
- generator = torch.Generator(device="cpu").manual_seed(0)
84
- pipe.set_ip_adapter_scale(ipadapter_scale) # Adjust the scale as needed
85
 
 
86
  image = pipe(
87
  width=1024,
88
  height=1024,
89
  prompt=prompt,
90
  negative_prompt=negative_prompt,
91
- control_image=control_image,
92
  guidance_scale=guidance_scale,
 
93
  clip_image=ref_img,
94
- num_inference_steps=40,
95
- generator=generator,
96
- max_sequence_length=77,
97
  ).images[0]
98
 
99
  return image
 
4
  import gradio as gr
5
  import spaces
6
  from huggingface_hub import login
 
 
 
 
 
 
7
  from diffusers.utils import load_image
8
+
9
+ from models.transformer_sd3 import SD3Transformer2DModel
10
+ from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
11
 
12
  # ----------------------------
13
  # Step 1: Download IP Adapter if not exists
 
32
  raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")
33
  login(token=token)
34
 
35
+ model_path = 'stabilityai/stable-diffusion-3.5-large'
36
  ip_adapter_path = './ip-adapter.bin'
37
  image_encoder_path = "google/siglip-so400m-patch14-384"
 
 
 
 
 
 
 
 
 
38
 
39
+ transformer = SD3Transformer2DModel.from_pretrained(
40
+ model_path, subfolder="transformer", torch_dtype=torch.bfloat16
41
+ )
42
 
43
+ pipe = StableDiffusion3Pipeline.from_pretrained(
44
+ model_path, transformer=transformer, torch_dtype=torch.bfloat16
45
+ ).to("cuda")
46
 
47
+ pipe.init_ipadapter(
48
+ ip_adapter_path=ip_adapter_path,
49
  image_encoder_path=image_encoder_path,
50
  nb_token=64,
 
51
  )
52
 
 
 
 
 
 
 
 
 
53
 
54
  # ----------------------------
55
  # Step 6: Gradio Function
56
  # ----------------------------
57
  @spaces.GPU
58
  def gui_generation(prompt,negative_prompt, ref_img, guidance_scale, ipadapter_scale):
 
 
59
 
 
 
60
 
61
+ ref_img = load_image(ref_img.name).convert('RGB')
 
62
 
63
+ # please note that SD3.5 Large is sensitive to highres generation like 1536x1536
64
  image = pipe(
65
  width=1024,
66
  height=1024,
67
  prompt=prompt,
68
  negative_prompt=negative_prompt,
69
+ num_inference_steps=24,
70
  guidance_scale=guidance_scale,
71
+ generator=torch.Generator("cuda").manual_seed(42),
72
  clip_image=ref_img,
73
+ ipadapter_scale=ipadapter_scale,
 
 
74
  ).images[0]
75
 
76
  return image