Spaces:
Paused
Paused
up
Browse files- app-fast.py +22 -4
app-fast.py
CHANGED
@@ -2,15 +2,20 @@ import gradio as gr
|
|
2 |
import PIL
|
3 |
import spaces
|
4 |
import torch
|
|
|
5 |
from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
|
6 |
from hi_diffusers.schedulers.flash_flow_match import (
|
7 |
FlashFlowMatchEulerDiscreteScheduler,
|
8 |
)
|
9 |
-
from transformers import
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Constants
|
12 |
MODEL_PREFIX: str = "HiDream-ai"
|
13 |
-
LLAMA_MODEL_NAME: str = "
|
14 |
MODEL_PATH = "HiDream-ai/HiDream-I1-Fast"
|
15 |
MODEL_CONFIGS = {
|
16 |
"guidance_scale": 0.0,
|
@@ -32,17 +37,30 @@ RESOLUTION_OPTIONS: list[str] = [
|
|
32 |
]
|
33 |
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
|
36 |
-
text_encoder =
|
37 |
LLAMA_MODEL_NAME,
|
|
|
|
|
|
|
38 |
output_hidden_states=True,
|
39 |
output_attentions=True,
|
40 |
-
|
41 |
).to("cuda")
|
42 |
|
|
|
|
|
|
|
43 |
transformer = HiDreamImageTransformer2DModel.from_pretrained(
|
44 |
MODEL_PATH,
|
45 |
subfolder="transformer",
|
|
|
46 |
torch_dtype=torch.bfloat16,
|
47 |
).to("cuda")
|
48 |
|
|
|
2 |
import PIL
|
3 |
import spaces
|
4 |
import torch
|
5 |
+
from diffusers import TorchAoConfig
|
6 |
from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
|
7 |
from hi_diffusers.schedulers.flash_flow_match import (
|
8 |
FlashFlowMatchEulerDiscreteScheduler,
|
9 |
)
|
10 |
+
from transformers import (
|
11 |
+
AutoModelForCausalLM,
|
12 |
+
AutoTokenizer,
|
13 |
+
AwqConfig,
|
14 |
+
)
|
15 |
|
16 |
# Constants
|
17 |
MODEL_PREFIX: str = "HiDream-ai"
|
18 |
+
LLAMA_MODEL_NAME: str = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
|
19 |
MODEL_PATH = "HiDream-ai/HiDream-I1-Fast"
|
20 |
MODEL_CONFIGS = {
|
21 |
"guidance_scale": 0.0,
|
|
|
37 |
]
|
38 |
|
39 |
|
40 |
+
quantization_config = AwqConfig(
|
41 |
+
bits=4,
|
42 |
+
fuse_max_seq_len=512, # Note: Update this as per your use-case
|
43 |
+
do_fuse=True,
|
44 |
+
)
|
45 |
+
|
46 |
tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
|
47 |
+
text_encoder = AutoModelForCausalLM.from_pretrained(
|
48 |
LLAMA_MODEL_NAME,
|
49 |
+
torch_dtype=torch.float16,
|
50 |
+
low_cpu_mem_usage=True,
|
51 |
+
device_map="auto",
|
52 |
output_hidden_states=True,
|
53 |
output_attentions=True,
|
54 |
+
quantization_config=quantization_config,
|
55 |
).to("cuda")
|
56 |
|
57 |
+
quantization_config = TorchAoConfig("int8wo")
|
58 |
+
|
59 |
+
quantization_config = TorchAoConfig("int8wo")
|
60 |
transformer = HiDreamImageTransformer2DModel.from_pretrained(
|
61 |
MODEL_PATH,
|
62 |
subfolder="transformer",
|
63 |
+
quantization_config=quantization_config,
|
64 |
torch_dtype=torch.bfloat16,
|
65 |
).to("cuda")
|
66 |
|