blanchon commited on
Commit
7a0fd29
·
1 Parent(s): b62f5fd
Files changed (1) hide show
  1. app-fast.py +22 -4
app-fast.py CHANGED
@@ -2,15 +2,20 @@ import gradio as gr
2
  import PIL
3
  import spaces
4
  import torch
 
5
  from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
6
  from hi_diffusers.schedulers.flash_flow_match import (
7
  FlashFlowMatchEulerDiscreteScheduler,
8
  )
9
- from transformers import AutoTokenizer, LlamaForCausalLM
 
 
 
 
10
 
11
  # Constants
12
  MODEL_PREFIX: str = "HiDream-ai"
13
- LLAMA_MODEL_NAME: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
14
  MODEL_PATH = "HiDream-ai/HiDream-I1-Fast"
15
  MODEL_CONFIGS = {
16
  "guidance_scale": 0.0,
@@ -32,17 +37,30 @@ RESOLUTION_OPTIONS: list[str] = [
32
  ]
33
 
34
 
 
 
 
 
 
 
35
  tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
36
- text_encoder = LlamaForCausalLM.from_pretrained(
37
  LLAMA_MODEL_NAME,
 
 
 
38
  output_hidden_states=True,
39
  output_attentions=True,
40
- torch_dtype=torch.bfloat16,
41
  ).to("cuda")
42
 
 
 
 
43
  transformer = HiDreamImageTransformer2DModel.from_pretrained(
44
  MODEL_PATH,
45
  subfolder="transformer",
 
46
  torch_dtype=torch.bfloat16,
47
  ).to("cuda")
48
 
 
2
  import PIL
3
  import spaces
4
  import torch
5
+ from diffusers import TorchAoConfig
6
  from hi_diffusers import HiDreamImagePipeline, HiDreamImageTransformer2DModel
7
  from hi_diffusers.schedulers.flash_flow_match import (
8
  FlashFlowMatchEulerDiscreteScheduler,
9
  )
10
+ from transformers import (
11
+ AutoModelForCausalLM,
12
+ AutoTokenizer,
13
+ AwqConfig,
14
+ )
15
 
16
  # Constants
17
  MODEL_PREFIX: str = "HiDream-ai"
18
+ LLAMA_MODEL_NAME: str = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
19
  MODEL_PATH = "HiDream-ai/HiDream-I1-Fast"
20
  MODEL_CONFIGS = {
21
  "guidance_scale": 0.0,
 
37
  ]
38
 
39
 
40
+ quantization_config = AwqConfig(
41
+ bits=4,
42
+ fuse_max_seq_len=512, # Note: Update this as per your use-case
43
+ do_fuse=True,
44
+ )
45
+
46
  tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_NAME, use_fast=False)
47
+ text_encoder = AutoModelForCausalLM.from_pretrained(
48
  LLAMA_MODEL_NAME,
49
+ torch_dtype=torch.float16,
50
+ low_cpu_mem_usage=True,
51
+ device_map="auto",
52
  output_hidden_states=True,
53
  output_attentions=True,
54
+ quantization_config=quantization_config,
55
  ).to("cuda")
56
 
57
+ quantization_config = TorchAoConfig("int8wo")
58
+
59
+ quantization_config = TorchAoConfig("int8wo")
60
  transformer = HiDreamImageTransformer2DModel.from_pretrained(
61
  MODEL_PATH,
62
  subfolder="transformer",
63
+ quantization_config=quantization_config,
64
  torch_dtype=torch.bfloat16,
65
  ).to("cuda")
66