davidizzle commited on
Commit
6d3a2f2
·
1 Parent(s): f52ef7c

FP8 support??

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import base64
5
 
@@ -43,6 +43,8 @@ def load_model():
43
  # model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
44
  # model_id = "deepseek-ai/deepseek-llm-7b-chat"
45
  model_id = "deepseek-ai/DeepSeek-V3-0324"
 
 
46
  tokenizer = AutoTokenizer.from_pretrained(model_id)
47
  model = AutoModelForCausalLM.from_pretrained(
48
  model_id,
@@ -50,7 +52,8 @@ def load_model():
50
  # torch_dtype=torch.float32
51
  device_map="auto",
52
  torch_dtype=torch.float16,
53
- trust_remote_code = True
 
54
  )
55
  # model.to("cpu")
56
  return tokenizer, model
 
1
  import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, FineGrainedFP8Config
3
  import torch
4
  import base64
5
 
 
43
  # model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
44
  # model_id = "deepseek-ai/deepseek-llm-7b-chat"
45
  model_id = "deepseek-ai/DeepSeek-V3-0324"
46
+
47
+ quantization_config = FineGrainedFP8Config()
48
  tokenizer = AutoTokenizer.from_pretrained(model_id)
49
  model = AutoModelForCausalLM.from_pretrained(
50
  model_id,
 
52
  # torch_dtype=torch.float32
53
  device_map="auto",
54
  torch_dtype=torch.float16,
55
+ trust_remote_code = True,
56
+ quantization_config=quantization_config
57
  )
58
  # model.to("cpu")
59
  return tokenizer, model