--- license: mit --- This repository contains the Model Checkpoint of GLM-4-32B-0414-GPTQ-4bits Base model: GLM-4-32B-0414 Quantization method: GPTQ Repository of quantization method: https://github.com/modelcloud/gptqmodel ## Eora Method Dataset ```python from datasets import load_dataset def question_answering_format(question, answer): return f"Question: {question}\nAnswer: {answer}" def multiple_choices_question_answering_format(question, choices, answer): return f"{question.strip()}\nA. {choices[0]}\nB. {choices[1]}\nC. {choices[2]}\nD. {choices[3]}\nAnswer: {answer}" ## An example of using ARC for construting the EoRA calibration set def construct_c4(): calibration_dataset = load_dataset( "/mnt/ceph/develop/jiawei/code_dataset/c4", data_files="en.noblocklist/c4-train.00001-of-01024.json.gz", split="train", download_mode="force_redownload" ).select(range(1024))["text"] return calibration_dataset def construct_ARC(): nsamples = 1024 arc_easy_calibration_dataset = load_dataset('ai2_arc', 'ARC-Easy', split='train').select(range(nsamples)) arc_challenge_calibration_dataset = load_dataset('ai2_arc', 'ARC-Challenge', split='train').select(range(nsamples)) dataset = [] for example in arc_easy_calibration_dataset: answer = example['choices']['text'][example['choices']['label'].index(example['answerKey'])] question = example['question'] dataset.append(question_answering_format(question=question,answer=answer)) for example in arc_challenge_calibration_dataset: answer = example['choices']['text'][example['choices']['label'].index(example['answerKey'])] question = example['question'] dataset.append(question_answering_format(question=question,answer=answer)) ## we recommend also include some examples from C4 to avoid overfitting to the downstream data c4_dataset = load_dataset( "allenai/c4", data_files="en/c4-train.00001-of-01024.json.gz", split="train" ).select(range(nsamples))["text"] return dataset + c4_dataset def multiple_identity_format(instruction, input_q, output): return f"{instruction.strip()} {input_q}\n {output}" def construct_mmlu(): mmlu_calibration_dataset = load_dataset('/mnt/ceph/develop/jiawei/code_dataset/mmlu', 'all', split='validation') dataset = [] for example in mmlu_calibration_dataset: question = example['question'] choices = example['choices'] answer = ['A','B','C','D'][example['answer']] dataset.append(multiple_choices_question_answering_format(question, choices, answer)) identity_dataset = load_dataset( "json", data_files="/mnt/ceph/develop/jiawei/GPTQModel/examples/eora/identity.json", split="train" ) for example in identity_dataset: instruction = example['instruction'] input_q = example['input'] output = example['output'] dataset.append(multiple_identity_format(instruction, input_q, output)) ## we recommend also include some examples from C4 to avoid overfitting to the downstream data c4_dataset = load_dataset( "/mnt/ceph/develop/jiawei/code_dataset/c4", data_files="en.noblocklist/c4-train.00001-of-01024.json.gz", split="train" ).select(range(1024))["text"] return dataset + c4_dataset ``` 2. quantization ```python python examples/eora/eora_generation.py THUDM/GLM-4-32B-0414 --bits 4 --quant_save_path glide-the/GLM-4-32B-0414-GPTQ-4bits --eora_dataset mmlu --eora_save_path glide-the/GLM-4-32B-0414-GPTQ-4bits-eora_rank64_c4 --eora_rank 64 ``` 3. inference ```python python examples/eora/eora_load_and_inference.py --quantized_model glide-the/GLM-4-32B-0414-GPTQ-4bits ``` # Usage transformers ```python from transformers import AutoModelForCausalLM, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("glide-the/GLM-4-32B-0414-GPTQ-4bits") quantized_model = AutoModelForCausalLM.from_pretrained("glide-the/GLM-4-32B-0414-GPTQ-4bits") print(tokenizer.decode(quantized_model.generate(**tokenizer("""从以下文本中提取 (公司, 创始人) 对: 示例1: 文本:"马云创立了阿里巴巴。" 输出:[("阿里巴巴", "马云")] 示例2: 文本:"比尔·盖茨是微软的联合创始人。" 输出:[("微软", "比尔·盖茨")] 待抽取: 文本:"乔布斯创办了苹果公司。" 输出:""", return_tensors="pt").to(quantized_model.device))[0])) ```