qwen2vl-flux

Build error

App Files Files Community

LPX55 commited on Apr 7

Commit

d836b5b

verified ·

1 Parent(s): a84487d

Update mini.py

Browse files

Files changed (1) hide show

mini.py +24 -31

mini.py CHANGED Viewed

@@ -3,12 +3,10 @@ import torch
 import spaces
 from PIL import Image
 import os
-from transformers import CLIPTokenizer, CLIPTextModel, AutoProcessor, T5EncoderModel, T5TokenizerFast, BitsAndBytesConfig
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
 from flux.transformer_flux_simple import FluxTransformer2DModel
 from flux.pipeline_flux_chameleon_og import FluxPipeline
-from flux.pipeline_flux_img2img import FluxImg2ImgPipeline
 import torch.nn as nn
 import math
 import logging
@@ -31,9 +29,6 @@ MODEL_CACHE_DIR = "model_cache"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.bfloat16
-quant_config = BitsAndBytesConfig(load_in_8bit=True,)
 # Aspect ratio options
 ASPECT_RATIOS = {
     "1:1": (1024, 1024),
@@ -86,13 +81,12 @@ tokenizer_two = T5TokenizerFast.from_pretrained(
 # Load larger models to CPU
 vae = AutoencoderKL.from_pretrained(
-    os.path.join(MODEL_CACHE_DIR, "flux/vae"),
-).to(DTYPE).cpu()
 transformer = FluxTransformer2DModel.from_pretrained(
-    os.path.join(MODEL_CACHE_DIR, "flux/transformer"),
-    quantization_config=quant_config,
-).to(DTYPE).cpu()
 scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
     os.path.join(MODEL_CACHE_DIR, "flux/scheduler"),
@@ -101,9 +95,8 @@ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
 # Load Qwen2VL to CPU
 qwen2vl = Qwen2VLSimplifiedModel.from_pretrained(
-    os.path.join(MODEL_CACHE_DIR, "qwen2-vl"),
-    quantization_config=quant_config,
-)
 # Load connector and embedder
 connector = Qwen2Connector().to(DTYPE).cpu()
@@ -140,15 +133,24 @@ pipeline = FluxPipeline(
     text_encoder=text_encoder,
     tokenizer=tokenizer,
 )
 def process_image(image):
     """Process image with Qwen2VL model"""
     try:
-        # Move Qwen2VL models to GPU
-        logger.info("Moving Qwen2VL models to GPU...")
-        # qwen2vl.to(DEVICE)
-        # connector.to(DEVICE)
         message = [
             {
                 "role": "user",
@@ -221,7 +223,7 @@ def compute_t5_text_embeddings(prompt):
     prompt_embeds = text_encoder_two(text_inputs.input_ids)[0]
     prompt_embeds = t5_context_embedder.to(DEVICE)(prompt_embeds)
-    # t5_context_embedder.cpu()
     return prompt_embeds
@@ -264,16 +266,7 @@ def generate(input_image, prompt="", guidance_scale=3.5, num_inference_steps=28,
         pooled_prompt_embeds = compute_text_embeddings(prompt)
         t5_prompt_embeds = compute_t5_text_embeddings(prompt)
         logger.info("Text embeddings computed")
-        # Move Transformer and VAE to GPU
-        logger.info("Moving Transformer and VAE to GPU...")
-        # transformer.to(DEVICE)
-        # vae.to(DEVICE)
-        # Update pipeline models
-        # pipeline.transformer = transformer
-        # pipeline.vae = vae
-        logger.info("Models moved to GPU")
         # Get dimensions
         width, height = ASPECT_RATIOS[aspect_ratio]

 import spaces
 from PIL import Image
 import os
+from transformers import CLIPTokenizer, CLIPTextModel, AutoProcessor, T5EncoderModel, T5TokenizerFast
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
 from flux.transformer_flux_simple import FluxTransformer2DModel
 from flux.pipeline_flux_chameleon_og import FluxPipeline
 import torch.nn as nn
 import math
 import logging
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.bfloat16
 # Aspect ratio options
 ASPECT_RATIOS = {
     "1:1": (1024, 1024),
 # Load larger models to CPU
 vae = AutoencoderKL.from_pretrained(
+    os.path.join(MODEL_CACHE_DIR, "flux/vae")
+).to(DTYPE).to(DEVICE)
 transformer = FluxTransformer2DModel.from_pretrained(
+    os.path.join(MODEL_CACHE_DIR, "flux/transformer")
+).to(DTYPE).to(DEVICE)
 scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
     os.path.join(MODEL_CACHE_DIR, "flux/scheduler"),
 # Load Qwen2VL to CPU
 qwen2vl = Qwen2VLSimplifiedModel.from_pretrained(
+    os.path.join(MODEL_CACHE_DIR, "qwen2-vl")
+).to(DTYPE).cpu()
 # Load connector and embedder
 connector = Qwen2Connector().to(DTYPE).cpu()
     text_encoder=text_encoder,
     tokenizer=tokenizer,
 )
+# Move Qwen2VL models to GPU
+logger.info("Moving Qwen2VL models to GPU...")
+qwen2vl.to(DEVICE)
+connector.to(DEVICE)
+# # Move Transformer and VAE to GPU
+# logger.info("Moving Transformer and VAE to GPU...")
+# transformer.to(DEVICE)
+# vae.to(DEVICE)
+# # Update pipeline models
+# pipeline.transformer = transformer
+# pipeline.vae = vae
+# logger.info("Models moved to GPU")
 def process_image(image):
     """Process image with Qwen2VL model"""
     try:
         message = [
             {
                 "role": "user",
     prompt_embeds = text_encoder_two(text_inputs.input_ids)[0]
     prompt_embeds = t5_context_embedder.to(DEVICE)(prompt_embeds)
+    t5_context_embedder.cpu()
     return prompt_embeds
         pooled_prompt_embeds = compute_text_embeddings(prompt)
         t5_prompt_embeds = compute_t5_text_embeddings(prompt)
         logger.info("Text embeddings computed")
         # Get dimensions
         width, height = ASPECT_RATIOS[aspect_ratio]