Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,143 @@
|
|
1 |
-
---
|
2 |
-
license: mit
|
3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
---
|
4 |
+
|
5 |
+
This repository contains the Model Checkpoint of GLM-4-32B-0414-GPTQ-4bits
|
6 |
+
|
7 |
+
Base model: GLM-4-32B-0414
|
8 |
+
|
9 |
+
Quantization method: GPTQ
|
10 |
+
|
11 |
+
Repository of quantization method: https://github.com/modelcloud/gptqmodel
|
12 |
+
|
13 |
+
## Eora Method Dataset
|
14 |
+
|
15 |
+
|
16 |
+
```python
|
17 |
+
|
18 |
+
from datasets import load_dataset
|
19 |
+
|
20 |
+
|
21 |
+
def question_answering_format(question, answer):
|
22 |
+
|
23 |
+
return f"Question: {question}\nAnswer: {answer}"
|
24 |
+
|
25 |
+
def multiple_choices_question_answering_format(question, choices, answer):
|
26 |
+
return f"{question.strip()}\nA. {choices[0]}\nB. {choices[1]}\nC. {choices[2]}\nD. {choices[3]}\nAnswer: {answer}"
|
27 |
+
|
28 |
+
## An example of using ARC for construting the EoRA calibration set
|
29 |
+
|
30 |
+
def construct_c4():
|
31 |
+
calibration_dataset = load_dataset(
|
32 |
+
"/mnt/ceph/develop/jiawei/code_dataset/c4",
|
33 |
+
data_files="en.noblocklist/c4-train.00001-of-01024.json.gz",
|
34 |
+
split="train", download_mode="force_redownload"
|
35 |
+
).select(range(1024))["text"]
|
36 |
+
return calibration_dataset
|
37 |
+
|
38 |
+
def construct_ARC():
|
39 |
+
nsamples = 1024
|
40 |
+
arc_easy_calibration_dataset = load_dataset('ai2_arc', 'ARC-Easy', split='train').select(range(nsamples))
|
41 |
+
arc_challenge_calibration_dataset = load_dataset('ai2_arc', 'ARC-Challenge', split='train').select(range(nsamples))
|
42 |
+
dataset = []
|
43 |
+
|
44 |
+
for example in arc_easy_calibration_dataset:
|
45 |
+
answer = example['choices']['text'][example['choices']['label'].index(example['answerKey'])]
|
46 |
+
question = example['question']
|
47 |
+
dataset.append(question_answering_format(question=question,answer=answer))
|
48 |
+
|
49 |
+
for example in arc_challenge_calibration_dataset:
|
50 |
+
answer = example['choices']['text'][example['choices']['label'].index(example['answerKey'])]
|
51 |
+
question = example['question']
|
52 |
+
dataset.append(question_answering_format(question=question,answer=answer))
|
53 |
+
|
54 |
+
## we recommend also include some examples from C4 to avoid overfitting to the downstream data
|
55 |
+
c4_dataset = load_dataset(
|
56 |
+
"allenai/c4",
|
57 |
+
data_files="en/c4-train.00001-of-01024.json.gz",
|
58 |
+
split="train"
|
59 |
+
).select(range(nsamples))["text"]
|
60 |
+
|
61 |
+
return dataset + c4_dataset
|
62 |
+
|
63 |
+
def multiple_identity_format(instruction, input_q, output):
|
64 |
+
return f"{instruction.strip()} {input_q}\n {output}"
|
65 |
+
def construct_mmlu():
|
66 |
+
|
67 |
+
mmlu_calibration_dataset = load_dataset('/mnt/ceph/develop/jiawei/code_dataset/mmlu', 'all', split='validation')
|
68 |
+
dataset = []
|
69 |
+
for example in mmlu_calibration_dataset:
|
70 |
+
question = example['question']
|
71 |
+
choices = example['choices']
|
72 |
+
answer = ['A','B','C','D'][example['answer']]
|
73 |
+
dataset.append(multiple_choices_question_answering_format(question, choices, answer))
|
74 |
+
identity_dataset = load_dataset(
|
75 |
+
"json",
|
76 |
+
data_files="/mnt/ceph/develop/jiawei/GPTQModel/examples/eora/identity.json",
|
77 |
+
split="train"
|
78 |
+
)
|
79 |
+
|
80 |
+
for example in identity_dataset:
|
81 |
+
instruction = example['instruction']
|
82 |
+
input_q = example['input']
|
83 |
+
output = example['output']
|
84 |
+
dataset.append(multiple_identity_format(instruction, input_q, output))
|
85 |
+
|
86 |
+
## we recommend also include some examples from C4 to avoid overfitting to the downstream data
|
87 |
+
c4_dataset = load_dataset(
|
88 |
+
"/mnt/ceph/develop/jiawei/code_dataset/c4",
|
89 |
+
data_files="en.noblocklist/c4-train.00001-of-01024.json.gz",
|
90 |
+
split="train"
|
91 |
+
).select(range(1024))["text"]
|
92 |
+
|
93 |
+
return dataset + c4_dataset
|
94 |
+
|
95 |
+
|
96 |
+
```
|
97 |
+
|
98 |
+
|
99 |
+
2. quantization
|
100 |
+
|
101 |
+
```python
|
102 |
+
|
103 |
+
python examples/eora/eora_generation.py THUDM/GLM-4-32B-0414 --bits 4 --quant_save_path glide-the/GLM-4-32B-0414-GPTQ-4bits --eora_dataset mmlu --eora_save_path glide-the/GLM-4-32B-0414-GPTQ-4bits-eora_rank64_c4 --eora_rank 64
|
104 |
+
|
105 |
+
```
|
106 |
+
|
107 |
+
3. inference
|
108 |
+
|
109 |
+
```python
|
110 |
+
|
111 |
+
|
112 |
+
python examples/eora/eora_load_and_inference.py --quantized_model glide-the/GLM-4-32B-0414-GPTQ-4bits
|
113 |
+
|
114 |
+
```
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
# Usage transformers
|
119 |
+
|
120 |
+
|
121 |
+
```python
|
122 |
+
|
123 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
124 |
+
|
125 |
+
tokenizer = AutoTokenizer.from_pretrained("glide-the/GLM-4-32B-0414-GPTQ-4bits")
|
126 |
+
quantized_model = AutoModelForCausalLM.from_pretrained("glide-the/GLM-4-32B-0414-GPTQ-4bits")
|
127 |
+
|
128 |
+
print(tokenizer.decode(quantized_model.generate(**tokenizer("""从以下文本中提取 (公司, 创始人) 对:
|
129 |
+
|
130 |
+
示例1:
|
131 |
+
文本:"马云创立了阿里巴巴。"
|
132 |
+
输出:[("阿里巴巴", "马云")]
|
133 |
+
|
134 |
+
示例2:
|
135 |
+
文本:"比尔·盖茨是微软的联合创始人。"
|
136 |
+
输出:[("微软", "比尔·盖茨")]
|
137 |
+
|
138 |
+
待抽取:
|
139 |
+
文本:"乔布斯创办了苹果公司。"
|
140 |
+
输出:""", return_tensors="pt").to(quantized_model.device))[0]))
|
141 |
+
|
142 |
+
```
|
143 |
+
|