Spaces:
Build error
Build error
Update mini.py
Browse files
mini.py
CHANGED
@@ -3,12 +3,10 @@ import torch
|
|
3 |
import spaces
|
4 |
from PIL import Image
|
5 |
import os
|
6 |
-
from transformers import CLIPTokenizer, CLIPTextModel, AutoProcessor, T5EncoderModel, T5TokenizerFast
|
7 |
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
|
8 |
from flux.transformer_flux_simple import FluxTransformer2DModel
|
9 |
from flux.pipeline_flux_chameleon_og import FluxPipeline
|
10 |
-
from flux.pipeline_flux_img2img import FluxImg2ImgPipeline
|
11 |
-
|
12 |
import torch.nn as nn
|
13 |
import math
|
14 |
import logging
|
@@ -31,9 +29,6 @@ MODEL_CACHE_DIR = "model_cache"
|
|
31 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
32 |
DTYPE = torch.bfloat16
|
33 |
|
34 |
-
quant_config = BitsAndBytesConfig(load_in_8bit=True,)
|
35 |
-
|
36 |
-
|
37 |
# Aspect ratio options
|
38 |
ASPECT_RATIOS = {
|
39 |
"1:1": (1024, 1024),
|
@@ -86,13 +81,12 @@ tokenizer_two = T5TokenizerFast.from_pretrained(
|
|
86 |
|
87 |
# Load larger models to CPU
|
88 |
vae = AutoencoderKL.from_pretrained(
|
89 |
-
os.path.join(MODEL_CACHE_DIR, "flux/vae")
|
90 |
-
).to(DTYPE).
|
91 |
|
92 |
transformer = FluxTransformer2DModel.from_pretrained(
|
93 |
-
os.path.join(MODEL_CACHE_DIR, "flux/transformer")
|
94 |
-
|
95 |
-
).to(DTYPE).cpu()
|
96 |
|
97 |
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
|
98 |
os.path.join(MODEL_CACHE_DIR, "flux/scheduler"),
|
@@ -101,9 +95,8 @@ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
|
|
101 |
|
102 |
# Load Qwen2VL to CPU
|
103 |
qwen2vl = Qwen2VLSimplifiedModel.from_pretrained(
|
104 |
-
os.path.join(MODEL_CACHE_DIR, "qwen2-vl")
|
105 |
-
|
106 |
-
)
|
107 |
|
108 |
# Load connector and embedder
|
109 |
connector = Qwen2Connector().to(DTYPE).cpu()
|
@@ -140,15 +133,24 @@ pipeline = FluxPipeline(
|
|
140 |
text_encoder=text_encoder,
|
141 |
tokenizer=tokenizer,
|
142 |
)
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
def process_image(image):
|
145 |
"""Process image with Qwen2VL model"""
|
146 |
try:
|
147 |
-
|
148 |
-
logger.info("Moving Qwen2VL models to GPU...")
|
149 |
-
# qwen2vl.to(DEVICE)
|
150 |
-
# connector.to(DEVICE)
|
151 |
-
|
152 |
message = [
|
153 |
{
|
154 |
"role": "user",
|
@@ -221,7 +223,7 @@ def compute_t5_text_embeddings(prompt):
|
|
221 |
|
222 |
prompt_embeds = text_encoder_two(text_inputs.input_ids)[0]
|
223 |
prompt_embeds = t5_context_embedder.to(DEVICE)(prompt_embeds)
|
224 |
-
|
225 |
|
226 |
return prompt_embeds
|
227 |
|
@@ -264,16 +266,7 @@ def generate(input_image, prompt="", guidance_scale=3.5, num_inference_steps=28,
|
|
264 |
pooled_prompt_embeds = compute_text_embeddings(prompt)
|
265 |
t5_prompt_embeds = compute_t5_text_embeddings(prompt)
|
266 |
logger.info("Text embeddings computed")
|
267 |
-
|
268 |
-
# Move Transformer and VAE to GPU
|
269 |
-
logger.info("Moving Transformer and VAE to GPU...")
|
270 |
-
# transformer.to(DEVICE)
|
271 |
-
# vae.to(DEVICE)
|
272 |
-
|
273 |
-
# Update pipeline models
|
274 |
-
# pipeline.transformer = transformer
|
275 |
-
# pipeline.vae = vae
|
276 |
-
logger.info("Models moved to GPU")
|
277 |
|
278 |
# Get dimensions
|
279 |
width, height = ASPECT_RATIOS[aspect_ratio]
|
|
|
3 |
import spaces
|
4 |
from PIL import Image
|
5 |
import os
|
6 |
+
from transformers import CLIPTokenizer, CLIPTextModel, AutoProcessor, T5EncoderModel, T5TokenizerFast
|
7 |
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
|
8 |
from flux.transformer_flux_simple import FluxTransformer2DModel
|
9 |
from flux.pipeline_flux_chameleon_og import FluxPipeline
|
|
|
|
|
10 |
import torch.nn as nn
|
11 |
import math
|
12 |
import logging
|
|
|
29 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
30 |
DTYPE = torch.bfloat16
|
31 |
|
|
|
|
|
|
|
32 |
# Aspect ratio options
|
33 |
ASPECT_RATIOS = {
|
34 |
"1:1": (1024, 1024),
|
|
|
81 |
|
82 |
# Load larger models to CPU
|
83 |
vae = AutoencoderKL.from_pretrained(
|
84 |
+
os.path.join(MODEL_CACHE_DIR, "flux/vae")
|
85 |
+
).to(DTYPE).to(DEVICE)
|
86 |
|
87 |
transformer = FluxTransformer2DModel.from_pretrained(
|
88 |
+
os.path.join(MODEL_CACHE_DIR, "flux/transformer")
|
89 |
+
).to(DTYPE).to(DEVICE)
|
|
|
90 |
|
91 |
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
|
92 |
os.path.join(MODEL_CACHE_DIR, "flux/scheduler"),
|
|
|
95 |
|
96 |
# Load Qwen2VL to CPU
|
97 |
qwen2vl = Qwen2VLSimplifiedModel.from_pretrained(
|
98 |
+
os.path.join(MODEL_CACHE_DIR, "qwen2-vl")
|
99 |
+
).to(DTYPE).cpu()
|
|
|
100 |
|
101 |
# Load connector and embedder
|
102 |
connector = Qwen2Connector().to(DTYPE).cpu()
|
|
|
133 |
text_encoder=text_encoder,
|
134 |
tokenizer=tokenizer,
|
135 |
)
|
136 |
+
# Move Qwen2VL models to GPU
|
137 |
+
logger.info("Moving Qwen2VL models to GPU...")
|
138 |
+
qwen2vl.to(DEVICE)
|
139 |
+
connector.to(DEVICE)
|
140 |
+
|
141 |
+
# # Move Transformer and VAE to GPU
|
142 |
+
# logger.info("Moving Transformer and VAE to GPU...")
|
143 |
+
# transformer.to(DEVICE)
|
144 |
+
# vae.to(DEVICE)
|
145 |
+
|
146 |
+
# # Update pipeline models
|
147 |
+
# pipeline.transformer = transformer
|
148 |
+
# pipeline.vae = vae
|
149 |
+
# logger.info("Models moved to GPU")
|
150 |
def process_image(image):
|
151 |
"""Process image with Qwen2VL model"""
|
152 |
try:
|
153 |
+
|
|
|
|
|
|
|
|
|
154 |
message = [
|
155 |
{
|
156 |
"role": "user",
|
|
|
223 |
|
224 |
prompt_embeds = text_encoder_two(text_inputs.input_ids)[0]
|
225 |
prompt_embeds = t5_context_embedder.to(DEVICE)(prompt_embeds)
|
226 |
+
t5_context_embedder.cpu()
|
227 |
|
228 |
return prompt_embeds
|
229 |
|
|
|
266 |
pooled_prompt_embeds = compute_text_embeddings(prompt)
|
267 |
t5_prompt_embeds = compute_t5_text_embeddings(prompt)
|
268 |
logger.info("Text embeddings computed")
|
269 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
# Get dimensions
|
272 |
width, height = ASPECT_RATIOS[aspect_ratio]
|