rajabmondal commited on
Commit
5a497b3
·
verified ·
1 Parent(s): 196b9de

updated quantization

Browse files
Files changed (1) hide show
  1. README.md +18 -0
README.md CHANGED
@@ -82,6 +82,24 @@ inputs = tokenizer.encode("public class HelloWorld {\n public static void mai
82
  outputs = model.generate(inputs)
83
  print(tokenizer.decode(outputs[0]))
84
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  ### Attribution & Other Requirements
87
 
 
82
  outputs = model.generate(inputs)
83
  print(tokenizer.decode(outputs[0]))
84
  ```
85
+ #### Quantized Versions through `bitsandbytes`
86
+ * _Using 8-bit precision (int8)_
87
+
88
+ ```java
89
+ # pip install bitsandbytes accelerate
90
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
91
+
92
+ # to use 4bit use `load_in_4bit=True` instead
93
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
94
+
95
+ checkpoint = "infosys/NT-Java-1.1B"
96
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
97
+ model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config)
98
+
99
+ inputs = tokenizer.encode("public class HelloWorld {\n public static void main(String[] args) {", return_tensors="pt").to("cuda")
100
+ outputs = model.generate(inputs)
101
+ print(tokenizer.decode(outputs[0]))
102
+ ```
103
 
104
  ### Attribution & Other Requirements
105