Spaces:
Sleeping
Sleeping
Upload 64 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- SVFT-main/LLM-Adapters/DATA_LICENSE +183 -0
- SVFT-main/LLM-Adapters/LICENSE +201 -0
- SVFT-main/LLM-Adapters/README.md +267 -0
- SVFT-main/LLM-Adapters/commonsense_evaluate.py +300 -0
- SVFT-main/LLM-Adapters/evaluate.py +302 -0
- SVFT-main/LLM-Adapters/export_hf_checkpoint.py +57 -0
- SVFT-main/LLM-Adapters/export_state_dict_checkpoint.py +125 -0
- SVFT-main/LLM-Adapters/finetune.py +438 -0
- SVFT-main/LLM-Adapters/ft-training_set/commonsense_15k.json +0 -0
- SVFT-main/LLM-Adapters/generate.py +191 -0
- SVFT-main/LLM-Adapters/lengths.ipynb +204 -0
- SVFT-main/LLM-Adapters/mathqa.py +27 -0
- SVFT-main/LLM-Adapters/multi_dataset_eval.py +49 -0
- SVFT-main/LLM-Adapters/peft/LICENSE +201 -0
- SVFT-main/LLM-Adapters/peft/Makefile +20 -0
- SVFT-main/LLM-Adapters/peft/pyproject.toml +36 -0
- SVFT-main/LLM-Adapters/peft/setup.py +76 -0
- SVFT-main/LLM-Adapters/peft/src/peft/__init__.py +55 -0
- SVFT-main/LLM-Adapters/peft/src/peft/mapping.py +202 -0
- SVFT-main/LLM-Adapters/peft/src/peft/peft_model.py +974 -0
- SVFT-main/LLM-Adapters/peft/src/peft/tuners/__init__.py +24 -0
- SVFT-main/LLM-Adapters/peft/src/peft/tuners/bottleneck.py +532 -0
- SVFT-main/LLM-Adapters/peft/src/peft/tuners/lora.py +624 -0
- SVFT-main/LLM-Adapters/peft/src/peft/tuners/p_tuning.py +159 -0
- SVFT-main/LLM-Adapters/peft/src/peft/tuners/prefix_tuning.py +101 -0
- SVFT-main/LLM-Adapters/peft/src/peft/tuners/prompt_tuning.py +120 -0
- SVFT-main/LLM-Adapters/peft/src/peft/utils/__init__.py +30 -0
- SVFT-main/LLM-Adapters/peft/src/peft/utils/adapters_utils.py +18 -0
- SVFT-main/LLM-Adapters/peft/src/peft/utils/config.py +169 -0
- SVFT-main/LLM-Adapters/peft/src/peft/utils/other.py +159 -0
- SVFT-main/LLM-Adapters/peft/src/peft/utils/save_and_load.py +96 -0
- SVFT-main/LLM-Adapters/peft/tests/__init__.py +0 -0
- SVFT-main/LLM-Adapters/peft/tests/test_config.py +96 -0
- SVFT-main/LLM-Adapters/peft/tests/test_peft_model.py +156 -0
- SVFT-main/LLM-Adapters/peft/tests/testing_common.py +103 -0
- SVFT-main/LLM-Adapters/peft/tests/testing_utils.py +49 -0
- SVFT-main/LLM-Adapters/picture.jpg +0 -0
- SVFT-main/LLM-Adapters/pyproject.toml +8 -0
- SVFT-main/LLM-Adapters/requirements.txt +9 -0
- SVFT-main/LLM-Adapters/run_commonsense.sh +33 -0
- SVFT-main/MetaMath/LICENSE +201 -0
- SVFT-main/MetaMath/README.MD +172 -0
- SVFT-main/MetaMath/data/README.md +7 -0
- SVFT-main/MetaMath/data/test/GSM8K_Backward.jsonl +0 -0
- SVFT-main/MetaMath/data/test/GSM8K_test.jsonl +0 -0
- SVFT-main/MetaMath/data/test/MATH_test.jsonl +0 -0
- SVFT-main/MetaMath/data/train/README.md +3 -0
- SVFT-main/MetaMath/eval_gsm8k.py +134 -0
- SVFT-main/MetaMath/eval_math.py +115 -0
- SVFT-main/MetaMath/requirements.txt +16 -0
SVFT-main/LLM-Adapters/DATA_LICENSE
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Attribution License (ODC-By)
|
2 |
+
PREAMBLE
|
3 |
+
The Open Data Commons Attribution License is a license agreement intended to allow users to freely share, modify, and use this Database subject only to the attribution requirements set out in Section 4.
|
4 |
+
|
5 |
+
Databases can contain a wide variety of types of content (images, audiovisual material, and sounds all in the same database, for example), and so this license only governs the rights over the Database, and not the contents of the Database individually. Licensors may therefore wish to use this license together with another license for the contents.
|
6 |
+
|
7 |
+
Sometimes the contents of a database, or the database itself, can be covered by other rights not addressed here (such as private contracts, trademark over the name, or privacy rights / data protection rights over information in the contents), and so you are advised that you may have to consult other documents or clear other rights before doing activities not covered by this License.
|
8 |
+
|
9 |
+
The Licensor (as defined below)
|
10 |
+
|
11 |
+
and
|
12 |
+
|
13 |
+
You (as defined below)
|
14 |
+
|
15 |
+
agree as follows:
|
16 |
+
|
17 |
+
1.0 DEFINITIONS OF CAPITALISED WORDS
|
18 |
+
“Collective Database” – Means this Database in unmodified form as part of a collection of independent databases in themselves that together are assembled into a collective whole. A work that constitutes a Collective Database will not be considered a Derivative Database.
|
19 |
+
|
20 |
+
“Convey” – As a verb, means Using the Database, a Derivative Database, or the Database as part of a Collective Database in any way that enables a Person to make or receive copies of the Database or a Derivative Database. Conveying does not include interaction with a user through a computer network, or creating and Using a Produced Work, where no transfer of a copy of the Database or a Derivative Database occurs.
|
21 |
+
|
22 |
+
“Contents” – The contents of this Database, which includes the information, independent works, or other material collected into the Database. For example, the contents of the Database could be factual data or works such as images, audiovisual material, text, or sounds.
|
23 |
+
|
24 |
+
“Database” – A collection of material (the Contents) arranged in a systematic or methodical way and individually accessible by electronic or other means offered under the terms of this License.
|
25 |
+
|
26 |
+
“Database Directive” – Means Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended or succeeded.
|
27 |
+
|
28 |
+
“Database Right” – Means rights resulting from the Chapter III (“sui generis”) rights in the Database Directive (as amended and as transposed by member states), which includes the Extraction and Re-utilisation of the whole or a Substantial part of the Contents, as well as any similar rights available in the relevant jurisdiction under Section 10.4.
|
29 |
+
|
30 |
+
“Derivative Database” – Means a database based upon the Database, and includes any translation, adaptation, arrangement, modification, or any other alteration of the Database or of a Substantial part of the Contents. This includes, but is not limited to, Extracting or Re-utilising the whole or a Substantial part of the Contents in a new Database.
|
31 |
+
|
32 |
+
“Extraction” – Means the permanent or temporary transfer of all or a Substantial part of the Contents to another medium by any means or in any form.
|
33 |
+
|
34 |
+
“License” – Means this license agreement and is both a license of rights such as copyright and Database Rights and an agreement in contract.
|
35 |
+
|
36 |
+
“Licensor” – Means the Person that offers the Database under the terms of this License.
|
37 |
+
|
38 |
+
“Person” – Means a natural or legal person or a body of persons corporate or incorporate.
|
39 |
+
|
40 |
+
“Produced Work” – a work (such as an image, audiovisual material, text, or sounds) resulting from using the whole or a Substantial part of the Contents (via a search or other query) from this Database, a Derivative Database, or this Database as part of a Collective Database.
|
41 |
+
|
42 |
+
“Publicly” – means to Persons other than You or under Your control by either more than 50% ownership or by the power to direct their activities (such as contracting with an independent consultant).
|
43 |
+
|
44 |
+
“Re-utilisation” – means any form of making available to the public all or a Substantial part of the Contents by the distribution of copies, by renting, by online or other forms of transmission.
|
45 |
+
|
46 |
+
“Substantial” – Means substantial in terms of quantity or quality or a combination of both. The repeated and systematic Extraction or Re-utilisation of insubstantial parts of the Contents may amount to the Extraction or Re-utilisation of a Substantial part of the Contents.
|
47 |
+
|
48 |
+
“Use” – As a verb, means doing any act that is restricted by copyright or Database Rights whether in the original medium or any other; and includes without limitation distributing, copying, publicly performing, publicly displaying, and preparing derivative works of the Database, as well as modifying the Database as may be technically necessary to use it in a different mode or format.
|
49 |
+
|
50 |
+
“You” – Means a Person exercising rights under this License who has not previously violated the terms of this License with respect to the Database, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation.
|
51 |
+
|
52 |
+
Words in the singular include the plural and vice versa.
|
53 |
+
|
54 |
+
2.0 WHAT THIS LICENSE COVERS
|
55 |
+
2.1. Legal effect of this document. This License is:
|
56 |
+
|
57 |
+
a. A license of applicable copyright and neighbouring rights;
|
58 |
+
|
59 |
+
b. A license of the Database Right; and
|
60 |
+
|
61 |
+
c. An agreement in contract between You and the Licensor.
|
62 |
+
|
63 |
+
2.2 Legal rights covered. This License covers the legal rights in the Database, including:
|
64 |
+
|
65 |
+
a. Copyright. Any copyright or neighbouring rights in the Database. The copyright licensed includes any individual elements of the Database, but does not cover the copyright over the Contents independent of this Database. See Section 2.4 for details. Copyright law varies between jurisdictions, but is likely to cover: the Database model or schema, which is the structure, arrangement, and organisation of the Database, and can also include the Database tables and table indexes; the data entry and output sheets; and the Field names of Contents stored in the Database;
|
66 |
+
|
67 |
+
b. Database Rights. Database Rights only extend to the Extraction and Re-utilisation of the whole or a Substantial part of the Contents. Database Rights can apply even when there is no copyright over the Database. Database Rights can also apply when the Contents are removed from the Database and are selected and arranged in a way that would not infringe any applicable copyright; and
|
68 |
+
|
69 |
+
c. Contract. This is an agreement between You and the Licensor for access to the Database. In return you agree to certain conditions of use on this access as outlined in this License.
|
70 |
+
|
71 |
+
2.3 Rights not covered.
|
72 |
+
|
73 |
+
a. This License does not apply to computer programs used in the making or operation of the Database;
|
74 |
+
|
75 |
+
b. This License does not cover any patents over the Contents or the Database; and
|
76 |
+
|
77 |
+
c. This License does not cover any trademarks associated with the Database.
|
78 |
+
|
79 |
+
2.4 Relationship to Contents in the Database. The individual items of the Contents contained in this Database may be covered by other rights, including copyright, patent, data protection, privacy, or personality rights, and this License does not cover any rights (other than Database Rights or in contract) in individual Contents contained in the Database.
|
80 |
+
|
81 |
+
For example, if used on a Database of images (the Contents), this License would not apply to copyright over individual images, which could have their own separate licenses, or one single license covering all of the rights over the images.
|
82 |
+
|
83 |
+
3.0 RIGHTS GRANTED
|
84 |
+
3.1 Subject to the terms and conditions of this License, the Licensor grants to You a worldwide, royalty-free, non-exclusive, terminable (but only under Section 9) license to Use the Database for the duration of any applicable copyright and Database Rights. These rights explicitly include commercial use, and do not exclude any field of endeavour. To the extent possible in the relevant jurisdiction, these rights may be exercised in all media and formats whether now known or created in the future.
|
85 |
+
|
86 |
+
The rights granted cover, for example:
|
87 |
+
|
88 |
+
a. Extraction and Re-utilisation of the whole or a Substantial part of the Contents;
|
89 |
+
|
90 |
+
b. Creation of Derivative Databases;
|
91 |
+
|
92 |
+
c. Creation of Collective Databases;
|
93 |
+
|
94 |
+
d. Creation of temporary or permanent reproductions by any means and in any form, in whole or in part, including of any Derivative Databases or as a part of Collective Databases; and
|
95 |
+
|
96 |
+
e. Distribution, communication, display, lending, making available, or performance to the public by any means and in any form, in whole or in part, including of any Derivative Database or as a part of Collective Databases.
|
97 |
+
|
98 |
+
3.2 Compulsory license schemes. For the avoidance of doubt:
|
99 |
+
|
100 |
+
a. Non-waivable compulsory license schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License;
|
101 |
+
|
102 |
+
b. Waivable compulsory license schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor waives the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; and,
|
103 |
+
|
104 |
+
c. Voluntary license schemes. The Licensor waives the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License.
|
105 |
+
|
106 |
+
3.3 The right to release the Database under different terms, or to stop distributing or making available the Database, is reserved. Note that this Database may be multiple-licensed, and so You may have the choice of using alternative licenses for this Database. Subject to Section 10.4, all other rights not expressly granted by Licensor are reserved.
|
107 |
+
|
108 |
+
4.0 CONDITIONS OF USE
|
109 |
+
4.1 The rights granted in Section 3 above are expressly made subject to Your complying with the following conditions of use. These are important conditions of this License, and if You fail to follow them, You will be in material breach of its terms.
|
110 |
+
|
111 |
+
4.2 Notices. If You Publicly Convey this Database, any Derivative Database, or the Database as part of a Collective Database, then You must:
|
112 |
+
|
113 |
+
a. Do so only under the terms of this License;
|
114 |
+
|
115 |
+
b. Include a copy of this License or its Uniform Resource Identifier (URI) with the Database or Derivative Database, including both in the Database or Derivative Database and in any relevant documentation;
|
116 |
+
|
117 |
+
c. Keep intact any copyright or Database Right notices and notices that refer to this License; and
|
118 |
+
|
119 |
+
d. If it is not possible to put the required notices in a particular file due to its structure, then You must include the notices in a location (such as a relevant directory) where users would be likely to look for it.
|
120 |
+
|
121 |
+
4.3 Notice for using output (Contents). Creating and Using a Produced Work does not require the notice in Section 4.2. However, if you Publicly Use a Produced Work, You must include a notice associated with the Produced Work reasonably calculated to make any Person that uses, views, accesses, interacts with, or is otherwise exposed to the Produced Work aware that Content was obtained from the Database, Derivative Database, or the Database as part of a Collective Database, and that it is available under this License.
|
122 |
+
|
123 |
+
a. Example notice. The following text will satisfy notice under Section 4.3:
|
124 |
+
|
125 |
+
Contains information from DATABASE NAME which is made available
|
126 |
+
under the ODC Attribution License.
|
127 |
+
DATABASE NAME should be replaced with the name of the Database and a hyperlink to the location of the Database. “ODC Attribution License” should contain a hyperlink to the URI of the text of this License. If hyperlinks are not possible, You should include the plain text of the required URI’s with the above notice.
|
128 |
+
|
129 |
+
4.4 Licensing of others. You may not sublicense the Database. Each time You communicate the Database, the whole or Substantial part of the Contents, or any Derivative Database to anyone else in any way, the Licensor offers to the recipient a license to the Database on the same terms and conditions as this License. You are not responsible for enforcing compliance by third parties with this License, but You may enforce any rights that You have over a Derivative Database. You are solely responsible for any modifications of a Derivative Database made by You or another Person at Your direction. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License.
|
130 |
+
|
131 |
+
5.0 MORAL RIGHTS
|
132 |
+
5.1 Moral rights. This section covers moral rights, including any rights to be identified as the author of the Database or to object to treatment that would otherwise prejudice the author’s honour and reputation, or any other derogatory treatment:
|
133 |
+
|
134 |
+
a. For jurisdictions allowing waiver of moral rights, Licensor waives all moral rights that Licensor may have in the Database to the fullest extent possible by the law of the relevant jurisdiction under Section 10.4;
|
135 |
+
|
136 |
+
b. If waiver of moral rights under Section 5.1 a in the relevant jurisdiction is not possible, Licensor agrees not to assert any moral rights over the Database and waives all claims in moral rights to the fullest extent possible by the law of the relevant jurisdiction under Section 10.4; and
|
137 |
+
|
138 |
+
c. For jurisdictions not allowing waiver or an agreement not to assert moral rights under Section 5.1 a and b, the author may retain their moral rights over certain aspects of the Database.
|
139 |
+
|
140 |
+
Please note that some jurisdictions do not allow for the waiver of moral rights, and so moral rights may still subsist over the Database in some jurisdictions.
|
141 |
+
|
142 |
+
6.0 FAIR DEALING, DATABASE EXCEPTIONS, AND OTHER RIGHTS NOT AFFECTED
|
143 |
+
6.1 This License does not affect any rights that You or anyone else may independently have under any applicable law to make any use of this Database, including without limitation:
|
144 |
+
|
145 |
+
a. Exceptions to the Database Right including: Extraction of Contents from non-electronic Databases for private purposes, Extraction for purposes of illustration for teaching or scientific research, and Extraction or Re-utilisation for public security or an administrative or judicial procedure.
|
146 |
+
|
147 |
+
b. Fair dealing, fair use, or any other legally recognised limitation or exception to infringement of copyright or other applicable laws.
|
148 |
+
|
149 |
+
6.2 This License does not affect any rights of lawful users to Extract and Re-utilise insubstantial parts of the Contents, evaluated quantitatively or qualitatively, for any purposes whatsoever, including creating a Derivative Database (subject to other rights over the Contents, see Section 2.4). The repeated and systematic Extraction or Re-utilisation of insubstantial parts of the Contents may however amount to the Extraction or Re-utilisation of a Substantial part of the Contents.
|
150 |
+
|
151 |
+
7.0 WARRANTIES AND DISCLAIMER
|
152 |
+
7.1 The Database is licensed by the Licensor “as is” and without any warranty of any kind, either express, implied, or arising by statute, custom, course of dealing, or trade usage. Licensor specifically disclaims any and all implied warranties or conditions of title, non-infringement, accuracy or completeness, the presence or absence of errors, fitness for a particular purpose, merchantability, or otherwise. Some jurisdictions do not allow the exclusion of implied warranties, so this exclusion may not apply to You.
|
153 |
+
|
154 |
+
8.0 LIMITATION OF LIABILITY
|
155 |
+
8.1 Subject to any liability that may not be excluded or limited by law, the Licensor is not liable for, and expressly excludes, all liability for loss or damage however and whenever caused to anyone by any use under this License, whether by You or by anyone else, and whether caused by any fault on the part of the Licensor or not. This exclusion of liability includes, but is not limited to, any special, incidental, consequential, punitive, or exemplary damages such as loss of revenue, data, anticipated profits, and lost business. This exclusion applies even if the Licensor has been advised of the possibility of such damages.
|
156 |
+
|
157 |
+
8.2 If liability may not be excluded by law, it is limited to actual and direct financial loss to the extent it is caused by proved negligence on the part of the Licensor.
|
158 |
+
|
159 |
+
9.0 TERMINATION OF YOUR RIGHTS UNDER THIS LICENSE
|
160 |
+
9.1 Any breach by You of the terms and conditions of this License automatically terminates this License with immediate effect and without notice to You. For the avoidance of doubt, Persons who have received the Database, the whole or a Substantial part of the Contents, Derivative Databases, or the Database as part of a Collective Database from You under this License will not have their licenses terminated provided their use is in full compliance with this License or a license granted under Section 4.8 of this License. Sections 1, 2, 7, 8, 9 and 10 will survive any termination of this License.
|
161 |
+
|
162 |
+
9.2 If You are not in breach of the terms of this License, the Licensor will not terminate Your rights under it.
|
163 |
+
|
164 |
+
9.3 Unless terminated under Section 9.1, this License is granted to You for the duration of applicable rights in the Database.
|
165 |
+
|
166 |
+
9.4 Reinstatement of rights. If you cease any breach of the terms and conditions of this License, then your full rights under this License will be reinstated:
|
167 |
+
|
168 |
+
a. Provisionally and subject to permanent termination until the 60th day after cessation of breach;
|
169 |
+
|
170 |
+
b. Permanently on the 60th day after cessation of breach unless otherwise reasonably notified by the Licensor; or
|
171 |
+
|
172 |
+
c. Permanently if reasonably notified by the Licensor of the violation, this is the first time You have received notice of violation of this License from the Licensor, and You cure the violation prior to 30 days after your receipt of the notice.
|
173 |
+
|
174 |
+
9.5 Notwithstanding the above, Licensor reserves the right to release the Database under different license terms or to stop distributing or making available the Database. Releasing the Database under different license terms or stopping the distribution of the Database will not withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above.
|
175 |
+
|
176 |
+
10.0 GENERAL
|
177 |
+
10.1 If any provision of this License is held to be invalid or unenforceable, that must not affect the validity or enforceability of the remainder of the terms and conditions of this License and each remaining provision of this License shall be valid and enforced to the fullest extent permitted by law.
|
178 |
+
|
179 |
+
10.2 This License is the entire agreement between the parties with respect to the rights granted here over the Database. It replaces any earlier understandings, agreements or representations with respect to the Database.
|
180 |
+
|
181 |
+
10.3 If You are in breach of the terms of this License, You will not be entitled to rely on the terms of this License or to complain of any breach by the Licensor.
|
182 |
+
|
183 |
+
10.4 Choice of law. This License takes effect in and will be governed by the laws of the relevant jurisdiction in which the License terms are sought to be enforced. If the standard suite of rights granted under applicable copyright law and Database Rights in the relevant jurisdiction includes additional rights not granted under this License, these additional rights are granted in this License in order to meet the terms of this License.
|
SVFT-main/LLM-Adapters/LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
SVFT-main/LLM-Adapters/README.md
ADDED
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!---
|
2 |
+
Copyright 2023 The HuggingFace Team. All rights reserved.
|
3 |
+
|
4 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
you may not use this file except in compliance with the License.
|
6 |
+
You may obtain a copy of the License at
|
7 |
+
|
8 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
|
10 |
+
Unless required by applicable law or agreed to in writing, software
|
11 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
See the License for the specific language governing permissions and
|
14 |
+
limitations under the License.
|
15 |
+
-->
|
16 |
+
|
17 |
+
<h1 align="center">
|
18 |
+
<img src="picture.jpg" width="73" height="114">
|
19 |
+
<p> LLM-Adapters</p>
|
20 |
+
</h1>
|
21 |
+
|
22 |
+
<h3 align="center">
|
23 |
+
<p>LLM-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models </p>
|
24 |
+
</h3>
|
25 |
+
LLM-Adapters is an easy-to-use framework that integrates various adapters into LLMs and can execute adapter-based PEFT methods of LLMs for different tasks. LLM-Adapter is an extension of HuggingFace's PEFT library, many thanks for their amazing work! Please find our paper at this link: https://arxiv.org/abs/2304.01933.
|
26 |
+
|
27 |
+
The framework includes state-of-the-art open-access LLMs: LLaMa, OPT, BLOOM, and GPT-J, as well as widely used adapters such as Bottleneck adapters, Parallel adapters, and LoRA.
|
28 |
+
|
29 |
+
Supported Adapters:
|
30 |
+
|
31 |
+
1. LoRA: [LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS](https://arxiv.org/pdf/2106.09685.pdf)
|
32 |
+
2. AdapterH: [Parameter-Efficient Transfer Learning for NLP](https://arxiv.org/pdf/1902.00751.pdf)
|
33 |
+
3. AdapterP: [GMAD-X: An Adapter-Based Framework for Multi-Task Cross-Lingual Transfer](https://arxiv.org/pdf/2005.00052.pdf)
|
34 |
+
4. Parallel: [TOWARDS A UNIFIED VIEW OF PARAMETER-EFFICIENT TRANSFER LEARNING](https://arxiv.org/pdf/2110.04366.pdf)
|
35 |
+
5. Prefix Tuning: [Prefix-Tuning: Optimizing Continuous Prompts for Generation](https://aclanthology.org/2021.acl-long.353/), [P-Tuning v2: Prompt Tuning Can Be Comparable to Fine-tuning Universally Across Scales and Tasks](https://arxiv.org/pdf/2110.07602.pdf)
|
36 |
+
6. P-Tuning: [GPT Understands, Too](https://arxiv.org/pdf/2103.10385.pdf)
|
37 |
+
7. Prompt Tuning: [The Power of Scale for Parameter-Efficient Prompt Tuning](https://arxiv.org/pdf/2104.08691.pdf)
|
38 |
+
|
39 |
+
## Latest News 🔥🔥
|
40 |
+
|
41 |
+
* [2023-08-10] LLM-Adapters has been accepted by EMNLP 2023.
|
42 |
+
* [2023-07-16] we released commonsense170k dataset and the The LLaMA-13B-Parallel model outformances ChatGPT on 8 commonsense benchmarks.
|
43 |
+
* [2023-04-21] We released math10k dataset and the [LLaMA-13B adapter checkpoints](https://drive.google.com/file/d/1NqUv-Hn_mAkGXsUOqpJKmPKW5Gp8mRlO/view?usp=sharing). The LLaMA-13B-Parallel model achieves **91%** of GPT-3.5 performance!
|
44 |
+
* [2023-04-10] We can support GPT-Neo and ChatGLM now!
|
45 |
+
* [2023-04-04] [Release code and dataset](https://github.com/AGI-Edgerunners/LLM-Adapters)
|
46 |
+
|
47 |
+
## Special Announcement
|
48 |
+
The `math_10k.json` data is collected with the training sets of GSM8K, MAWPS, and AQuA(1000 examples). However, MAWPS consists of AddSub, MultiArith, SingleOp, SingleEq, SimulEq-S, SimulEq-L. Thus, we can't utilize MultiArith, AddSub, and SingleEq as evaluation benchmarks with models trained with `math_10k.json`. We evaluate the PEFT methods on the MAWPS test set instead, and the result table has been updated (The findings in the paper are consistent). Furthermore, two variations of `math_10k.json` have been uploaded, `math_7K.json` where the MAWPS samples have been deleted, and `math_14k.json` where the MAWPS samples have been deleted as well and we combine ChatGPT and GPT-4 rationales. Sincerely apologize for any inconvenience!
|
49 |
+
|
50 |
+
## Setup
|
51 |
+
|
52 |
+
1. Install dependencies
|
53 |
+
```bash
|
54 |
+
pip install -r requirements.txt
|
55 |
+
```
|
56 |
+
|
57 |
+
2. Set environment variables, or modify the files referencing `BASE_MODEL`:
|
58 |
+
|
59 |
+
```bash
|
60 |
+
# Files referencing `BASE_MODEL`
|
61 |
+
# export_hf_checkpoint.py
|
62 |
+
# export_state_dict_checkpoint.py
|
63 |
+
|
64 |
+
export BASE_MODEL=yahma/llama-7b-hf
|
65 |
+
```
|
66 |
+
|
67 |
+
Both `finetune.py` and `generate.py` use `--base_model` flag as shown further below.
|
68 |
+
|
69 |
+
3. If bitsandbytes doesn't work, [install it from source.](https://github.com/TimDettmers/bitsandbytes/blob/main/compile_from_source.md) Windows users can follow [these instructions](https://github.com/tloen/alpaca-lora/issues/17).
|
70 |
+
|
71 |
+
## Training(finetune.py)
|
72 |
+
|
73 |
+
This file contains some code related to prompt construction and tokenization.In this file, specify different adapters and different sets of data, so that different models can be trained.
|
74 |
+
|
75 |
+
Example usage for multiple GPUs:
|
76 |
+
|
77 |
+
```bash
|
78 |
+
WORLD_SIZE=2 CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node=2 --master_port=3192 finetune.py \
|
79 |
+
--base_model 'yahma/llama-7b-hf' \
|
80 |
+
--data_path 'math_10k.json' \
|
81 |
+
--output_dir './trained_models/llama-lora' \
|
82 |
+
--batch_size 16 \
|
83 |
+
--micro_batch_size 4 \
|
84 |
+
--num_epochs 3 \
|
85 |
+
--learning_rate 3e-4 \
|
86 |
+
--cutoff_len 256 \
|
87 |
+
--val_set_size 120 \
|
88 |
+
--adapter_name lora
|
89 |
+
```
|
90 |
+
|
91 |
+
The `math_10k.json` data is collected with the training sets of GSM8K, MAWPS, and AQuA(1000 examples). `yahma/llama-7b-hf` is a base model, LLaMa-7B. Add `lora` adapter to this model.
|
92 |
+
|
93 |
+
Example usage for Single GPUs:
|
94 |
+
|
95 |
+
```bash
|
96 |
+
CUDA_VISIBLE_DEVICES=0 python finetune.py \
|
97 |
+
--base_model 'yahma/llama-7b-hf' \
|
98 |
+
--data_path 'math_10k.json' \
|
99 |
+
--output_dir './trained_models/llama-lora' \
|
100 |
+
--batch_size 16 \
|
101 |
+
--micro_batch_size 4 \
|
102 |
+
--num_epochs 3 \
|
103 |
+
--learning_rate 3e-4 \
|
104 |
+
--cutoff_len 256 \
|
105 |
+
--val_set_size 120 \
|
106 |
+
--adapter_name lora
|
107 |
+
```
|
108 |
+
|
109 |
+
Moreover, you can use `--use_gradient_checkpointing` to save more GPU memory, but it will increase the training time.
|
110 |
+
|
111 |
+
To use the AdapterH, just add the following arguments:
|
112 |
+
|
113 |
+
```bash
|
114 |
+
--adapter_name bottleneck # use the bottleneck adapter, refers to AdapterH in the result table
|
115 |
+
```
|
116 |
+
|
117 |
+
To use the AdapterP, just add the following arguments:
|
118 |
+
|
119 |
+
```bash
|
120 |
+
--adapter_name bottleneck
|
121 |
+
--use_adapterp # use the AdapterP, refers to AdapterP in the result table
|
122 |
+
```
|
123 |
+
|
124 |
+
To use parallel adapter, just add the following arguments:
|
125 |
+
|
126 |
+
```bash
|
127 |
+
--adapter_name bottleneck
|
128 |
+
--use_parallel_adapter
|
129 |
+
```
|
130 |
+
|
131 |
+
Note that, In order to facilitate INT8 training of large models with parallel adapters, we have adopted a technique whereby the parallel adapter layers are incorporated into multi-head attention layers and MLP layers, in parallel with Linear layers. It is different from [Hu et al. (2021)](https://arxiv.org/pdf/2106.09685.pdf).
|
132 |
+
|
133 |
+
## Inference (generate.py)
|
134 |
+
|
135 |
+
This file reads the foundation model from the Hugging Face model hub and the LoRA weights from `'./trained_models/llama-lora'` , and runs a Gradio interface for inference on a specified input. Users should treat this as example code for the use of the model, and modify it as needed.
|
136 |
+
Example usage:
|
137 |
+
|
138 |
+
```bash
|
139 |
+
CUDA_VISIBLE_DEVICES=0 torchrun generate.py \
|
140 |
+
--base_model 'yahma/llama-7b-hf' \
|
141 |
+
--lora_weights './trained_models/llama-lora'
|
142 |
+
```
|
143 |
+
|
144 |
+
## Evaluation (evaluate.py)
|
145 |
+
|
146 |
+
To evaluate the performance of the finetuned model on the Arithmetic Reasoning tasks, you can use the following command:
|
147 |
+
|
148 |
+
```bash
|
149 |
+
CUDA_VISIBLE_DEVICES=0 python evaluate.py
|
150 |
+
--model LLaMA-7B \ #specify the base model
|
151 |
+
--adapter LoRA \ #specify the adapter name ["LoRA", "AdapterH", "AdapterP", "Parallel", "Scaled_Parallel""]
|
152 |
+
--dataset SVAMP \ #specify the test dataset
|
153 |
+
--base_model 'yahma/llama-7b-hf' \
|
154 |
+
--lora_weights './trained_models/llama-lora'
|
155 |
+
```
|
156 |
+
|
157 |
+
<!-- ## Resource Consumption
|
158 |
+
|
159 |
+
There is a table of resouce needed for different adapters, which contains Trainable Parameters, GPU RAM Usage, and Fine-tuning Time on the Arithmetic Reasoning dataset `math_10k.json`
|
160 |
+
|
161 |
+
Hyper-parameter setting: num_epochs=3, lora_r=8, lora_alpha=16, bottleneck_size=256
|
162 |
+
|
163 |
+
Models: LLaMA-13B, LLaMA-7B, BLOOM-6.7B, GPT-j-6B
|
164 |
+
Dataset: 3.2K math word problems
|
165 |
+
|
166 |
+
Hardware: 2*3090 GPUs
|
167 |
+
|
168 |
+
| Model | Trainable Parameters | GPU RAM Usage | Fine-tuning Time |
|
169 |
+
|-----------------------|----------------------|---------------|------------------|
|
170 |
+
| LLaMA-7B-LoRA | 4.2M | 18GB | 4h |
|
171 |
+
| LLaMA-7B-AdapterH | 200M | 22GB | 4h |
|
172 |
+
| LLaMA-7B-AdapterP | 200M | 22GB | 4h |
|
173 |
+
| LLaMA-7B-Parallel | 200M | 22GB | 4h | -->
|
174 |
+
|
175 |
+
|
176 |
+
## Finetune Result
|
177 |
+
There are the finetune results in different models with 4 math reasoning datasets, which contains GSM8K, AQuA, SVAMP, and MAWPS. In this table, we use the optimal configuration and placement of Prefix-Tuning, Series Adapter, LoRA, and Parallel Adapter according to the empirical study in our [paper](https://aclanthology.org/2023.emnlp-main.319/).
|
178 |
+
|
179 |
+
| Model | GSM8K | AQuA | MAWPS | SVAMP | Average |
|
180 |
+
|-----------------------|--------|--------|----------|--------|---------|
|
181 |
+
| GPT-3.5 |**56.4**|**38.9**| **87.4** |**69.9**|**63.2** |
|
182 |
+
| BLOOMz-7B-Prefix | 13.8 | 12.5 | 47.5 | 24.1 | 24.5 |
|
183 |
+
| BLOOMz-7B-Series | 14.3 | 20.5 | 62.2 | 38.1 | 33.8 |
|
184 |
+
| BLOOMz-7B-Parallel | 18.5 | 18.9 | 70.6 | 36.4 | 36.1 |
|
185 |
+
| BLOOMz-7B-LoRA | 17.4 | 21.3 | 70.2 | 41.0 | 37.5 |
|
186 |
+
| GPT-j-6B-Prefix | 16.0 | 14.7 | 59.2 | 31.0 | 30.2 |
|
187 |
+
| GPT-j-6B-Series | 19.5 | 15.0 | 80.3 | 43.6 | 39.6 |
|
188 |
+
| GPT-j-6B-Parallel | 18.9 | 17.9 | 78.2 | 41.1 | 39.0 |
|
189 |
+
| GPT-j-6B-LoRA | 23.0 | 16.1 | 79.4 | 46.0 | 41.1 |
|
190 |
+
| LLaMA-7B-Prefix | 24.4 | 14.2 | 63.4 | 38.1 | 35.0 |
|
191 |
+
| LLaMA-7B-Series | 33.3 | 15.0 | 77.7 | 52.3 | 44.6 |
|
192 |
+
| LLaMA-7B-Parallel | 35.3 | 18.1 | 82.4 | 49.6 | 46.4 |
|
193 |
+
| LLaMA-7B-LoRA | 37.5 | 18.9 | 79.0 | 52.1 | 46.9 |
|
194 |
+
| LLaMA-13B-Prefix | 31.1 | 15.7 | 66.8 | 41.4 | 38.8 |
|
195 |
+
| LLaMA-13B-Series | 44.0 | 22.0 | 78.6 | 50.8 | 48.9 |
|
196 |
+
| LLaMA-13B-Parallel | 43.3 | 20.5 | 81.1 | 55.7 | 50.2 |
|
197 |
+
| LLaMA-13B-LoRA | 47.5 | 18.5 | 83.6 | 54.6 | 51.1 |
|
198 |
+
|
199 |
+
|
200 |
+
There are the finetune results in different models with eight commonsense reasoning datasets.
|
201 |
+
|
202 |
+
| Model | BoolQ | PIQA | SIQA | HellaSwag | WinoGrande | ARC-e | ARC-c | OBQA | Average |
|
203 |
+
|-----------------------|---------|--------|--------|-------------|--------------|---------|---------|--------|-----------|
|
204 |
+
| ChatGPT | **73.1**|**85.4**| 68.5 | 78.5 | 66.1 |**89.8** |**79.9** | 74.8 | 77.0 |
|
205 |
+
| BLOOMz-7B-Prefix | 45.6 | 53.7 | 46.3 | 26.7 | 49.5 | 52.1 | 39.7 | 44.3 | 44.7 |
|
206 |
+
| BLOOMz-7B-Series | 65.4 | 70.4 | 73.6 | 53.4 | 69.3 | 72.3 | 55.9 | 68.0 | 66.0 |
|
207 |
+
| BLOOMz-7B-Parallel | 64.1 | 71.5 | 72.1 | 52.9 | 67.0 | 70.5 | 54.7 | 69.6 | 65.3 |
|
208 |
+
| BLOOMz-7B-LoRA | 65.9 | 75.3 | 74.5 | 57.3 | 72.5 | 74.6 | 57.8 | 73.4 | 68.9 |
|
209 |
+
| GPT-j-6B-Prefix | 63.1 | 66.9 | 68.7 | 34.4 | 64.5 | 64.4 | 46.8 | 59.0 | 58.5 |
|
210 |
+
| GPT-j-6B-Series | 62.1 | 63.5 | 72.3 | 30.6 | 68.0 | 63.9 | 48.1 | 63.8 | 59.0 |
|
211 |
+
| GPT-j-6B-Parallel | 62.2 | 69.7 | 70.0 | 41.7 | 65.0 | 60.2 | 44.6 | 58.2 | 59.0 |
|
212 |
+
| GPT-j-6B-LoRA | 62.4 | 68.6 | 49.5 | 43.1 | 57.3 | 43.4 | 31.0 | 46.6 | 50.2 |
|
213 |
+
| LLaMA-7B-Prefix | 64.3 | 76.8 | 73.9 | 42.1 | 72.1 | 72.9 | 54.0 | 60.6 | 64.6 |
|
214 |
+
| LLaMA-7B-Series | 63.0 | 79.2 | 76.3 | 67.9 | 75.7 | 74.5 | 57.1 | 72.4 | 70.8 |
|
215 |
+
| LLaMA-7B-Parallel | 67.9 | 76.4 | 78.8 | 69.8 | 78.9 | 73.7 | 57.3 | 75.2 | 72.3 |
|
216 |
+
| LLaMA-7B-LoRA | 68.9 | 80.7 | 77.4 | 78.1 | 78.8 | 77.8 | 61.3 | 74.8 | 74.7 |
|
217 |
+
| LLaMA-13B-Prefix | 65.3 | 75.4 | 72.1 | 55.2 | 68.6 | 79.5 | 62.9 | 68.0 | 68.4 |
|
218 |
+
| LLaMA-13B-Series | 71.8 | 83.0 | 79.2 | 88.1 | 82.4 | 82.5 | 67.3 | 81.8 | 79.5 |
|
219 |
+
| LLaMA-13B-Parallel | 72.5 | 84.8 | 79.8 |**92.1** |**84.7** | 84.2 | 71.2 |**82.4**|**81.5** |
|
220 |
+
| LLaMA-13B-LoRA | 72.1 | 83.5 |**80.5**| 90.5 | 83.7 | 82.8 | 68.3 |**82.4**| 80.5 |
|
221 |
+
|
222 |
+
|
223 |
+
### Adapter support matrix
|
224 |
+
This metrix shows whether different models can use LoRA,AdapterH,AdapterP,Parallel and Scaled Parallel adapters.
|
225 |
+
|
226 |
+
| Adapter | LoRA | AdapterH | AdapterP | Parallel| Prefix Tuning |P-Tuning|Prompt Tuning|
|
227 |
+
|--------------|-------|-------|----------|-------|-------|-------|-------|
|
228 |
+
| LLaMA | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |
|
229 |
+
| BLOOM | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |
|
230 |
+
| GPT-J | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |
|
231 |
+
| OPT | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |
|
232 |
+
| GPT-2 | ✅ | 🔧Developing | 🔧Developing|🔧Developing | ✅ | ✅ | ✅ |
|
233 |
+
| GPT-Neo | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
234 |
+
| GPT-NeoX-20B | ✅ | 🔧Developing | 🔧Developing|🔧Developing | ✅ | ✅ | ✅ |
|
235 |
+
| ChatGLM | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |
|
236 |
+
|
237 |
+
|
238 |
+
### TODO List
|
239 |
+
- [x] Add AdapterH
|
240 |
+
- [x] Add AdapterP
|
241 |
+
- [x] Add Parallel Adapter
|
242 |
+
- [ ] Support More LLMs
|
243 |
+
- [ ] Support Multiple Adapter
|
244 |
+
- [ ] Support Adapter Composition
|
245 |
+
- [ ] Support Adapter Fusion
|
246 |
+
|
247 |
+
|
248 |
+
## :star: Star History
|
249 |
+
|
250 |
+
[](https://star-history.com/#AGI-Edgerunners/LLM-Adapters&Date)
|
251 |
+
|
252 |
+
## Citing <img src="picture.jpg" width="14px" height="14px"> LLM-Adapter
|
253 |
+
|
254 |
+
If you use <img src="picture.jpg" width="14px" height="14px"> LLM-Adapters in your publication, please cite it by using the following BibTeX entry.
|
255 |
+
|
256 |
+
```bibtex
|
257 |
+
@article{hu2023llm,
|
258 |
+
title={LLM-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models},
|
259 |
+
author={Hu, Zhiqiang and Lan, Yihuai and Wang, Lei and Xu, Wanyu and Lim, Ee-Peng and Lee, Roy Ka-Wei and Bing, Lidong and Poria, Soujanya},
|
260 |
+
journal={arXiv preprint arXiv:2304.01933},
|
261 |
+
year={2023}
|
262 |
+
}
|
263 |
+
```
|
264 |
+
|
265 |
+
## Acknowledgement
|
266 |
+
|
267 |
+
This repo benefits from [PEFT](https://github.com/huggingface/peft), [Adapter-Transformer](https://github.com/adapter-hub/adapter-transformers), [Alpaca-lora](https://github.com/tloen/alpaca-lora). Thanks for their wonderful works. Additionally, we thank DONG Shan and [dream.ai](https://dream.ai/create) for the exceptional logo design, which has added immense value to our project.
|
SVFT-main/LLM-Adapters/commonsense_evaluate.py
ADDED
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import sys
|
6 |
+
import argparse
|
7 |
+
|
8 |
+
import fire
|
9 |
+
|
10 |
+
import torch
|
11 |
+
|
12 |
+
sys.path.append(os.path.join(os.getcwd(), "peft/src/"))
|
13 |
+
from peft import PeftModel
|
14 |
+
from tqdm import tqdm
|
15 |
+
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer
|
16 |
+
|
17 |
+
if torch.cuda.is_available():
|
18 |
+
device = "cuda"
|
19 |
+
else:
|
20 |
+
device = "cpu"
|
21 |
+
|
22 |
+
try:
|
23 |
+
if torch.backends.mps.is_available():
|
24 |
+
device = "mps"
|
25 |
+
except: # noqa: E722
|
26 |
+
pass
|
27 |
+
|
28 |
+
|
29 |
+
def main(
|
30 |
+
load_8bit: bool = False,
|
31 |
+
base_model: str = "",
|
32 |
+
lora_weights: str = "tloen/alpaca-lora-7b",
|
33 |
+
share_gradio: bool = False,
|
34 |
+
):
|
35 |
+
args = parse_args()
|
36 |
+
|
37 |
+
def evaluate(
|
38 |
+
instructions,
|
39 |
+
input=None,
|
40 |
+
temperature=0.1,
|
41 |
+
top_p=0.75,
|
42 |
+
top_k=40,
|
43 |
+
num_beams=4,
|
44 |
+
max_new_tokens=32,
|
45 |
+
**kwargs,
|
46 |
+
):
|
47 |
+
prompts = [generate_prompt(instruction, input) for instruction in instructions]
|
48 |
+
inputs = tokenizer(prompts, return_tensors="pt", padding=True)
|
49 |
+
input_ids = inputs["input_ids"].to(device)
|
50 |
+
generation_config = GenerationConfig(
|
51 |
+
temperature=temperature,
|
52 |
+
top_p=top_p,
|
53 |
+
top_k=top_k,
|
54 |
+
num_beams=num_beams,
|
55 |
+
**kwargs,
|
56 |
+
)
|
57 |
+
with torch.no_grad():
|
58 |
+
generation_output = model.generate(
|
59 |
+
input_ids=input_ids,
|
60 |
+
generation_config=generation_config,
|
61 |
+
return_dict_in_generate=True,
|
62 |
+
output_scores=True,
|
63 |
+
max_new_tokens=max_new_tokens,
|
64 |
+
)
|
65 |
+
s = generation_output.sequences
|
66 |
+
outputs = tokenizer.batch_decode(s, skip_special_tokens=True)
|
67 |
+
outputs = [o.split("### Response:")[1].strip() for o in outputs]
|
68 |
+
print(outputs)
|
69 |
+
return outputs
|
70 |
+
|
71 |
+
save_file = f'experiment/{args.model}-{args.adapter}-{args.dataset}.json'
|
72 |
+
create_dir('experiment/')
|
73 |
+
|
74 |
+
dataset = load_data(args)
|
75 |
+
batches = create_batch(dataset, args.batch_size)
|
76 |
+
tokenizer, model = load_model(args)
|
77 |
+
total = len(batches)
|
78 |
+
correct = 0
|
79 |
+
current = 0
|
80 |
+
output_data = []
|
81 |
+
pbar = tqdm(total=total)
|
82 |
+
for idx, batch in enumerate(batches):
|
83 |
+
current += len(batch)
|
84 |
+
instructions = [data.get('instruction') for data in batch]
|
85 |
+
|
86 |
+
outputs = evaluate(instructions)
|
87 |
+
|
88 |
+
for data, output in zip(batch, outputs):
|
89 |
+
label = data.get('answer')
|
90 |
+
flag = False
|
91 |
+
predict = extract_answer(args, output)
|
92 |
+
if label == predict:
|
93 |
+
correct += 1
|
94 |
+
flag = True
|
95 |
+
new_data = copy.deepcopy(data)
|
96 |
+
new_data['output_pred'] = output
|
97 |
+
new_data['pred'] = predict
|
98 |
+
new_data['flag'] = flag
|
99 |
+
output_data.append(new_data)
|
100 |
+
print(data["instruction"])
|
101 |
+
print(output)
|
102 |
+
print('prediction:', predict)
|
103 |
+
print('label:', label)
|
104 |
+
print('---------------')
|
105 |
+
print(f'\rtest:{idx + 1}/{total} | accuracy {correct} {correct / current}')
|
106 |
+
print('---------------')
|
107 |
+
with open(save_file, 'w+') as f:
|
108 |
+
json.dump(output_data, f, indent=4)
|
109 |
+
pbar.update(1)
|
110 |
+
pbar.close()
|
111 |
+
print('\n')
|
112 |
+
print('test finished')
|
113 |
+
|
114 |
+
|
115 |
+
def create_dir(dir_path):
|
116 |
+
if not os.path.exists(dir_path):
|
117 |
+
os.mkdir(dir_path)
|
118 |
+
return
|
119 |
+
|
120 |
+
|
121 |
+
def generate_prompt(instruction, input=None):
|
122 |
+
if input:
|
123 |
+
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
124 |
+
|
125 |
+
### Instruction:
|
126 |
+
{instruction}
|
127 |
+
|
128 |
+
### Input:
|
129 |
+
{input}
|
130 |
+
|
131 |
+
### Response:
|
132 |
+
""" # noqa: E501
|
133 |
+
else:
|
134 |
+
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
135 |
+
|
136 |
+
### Instruction:
|
137 |
+
{instruction}
|
138 |
+
|
139 |
+
### Response:
|
140 |
+
""" # noqa: E501
|
141 |
+
|
142 |
+
|
143 |
+
def load_data(args) -> list:
|
144 |
+
"""
|
145 |
+
read data from dataset file
|
146 |
+
Args:
|
147 |
+
args:
|
148 |
+
|
149 |
+
Returns:
|
150 |
+
|
151 |
+
"""
|
152 |
+
file_path = f'dataset/{args.dataset}/test.json'
|
153 |
+
if not os.path.exists(file_path):
|
154 |
+
raise FileNotFoundError(f"can not find dataset file : {file_path}")
|
155 |
+
json_data = json.load(open(file_path, 'r'))
|
156 |
+
return json_data
|
157 |
+
|
158 |
+
def create_batch(dataset, batch_size):
|
159 |
+
batches = []
|
160 |
+
num_batch = len(dataset)//batch_size if len(dataset) % batch_size == 0 else len(dataset)//batch_size + 1
|
161 |
+
for i in range(num_batch):
|
162 |
+
batch = dataset[i*batch_size: min((i+1)*batch_size, len(dataset))]
|
163 |
+
batches.append(batch)
|
164 |
+
return batches
|
165 |
+
|
166 |
+
|
167 |
+
def parse_args():
|
168 |
+
parser = argparse.ArgumentParser()
|
169 |
+
parser.add_argument('--dataset', choices=["boolq", "piqa", "social_i_qa", "hellaswag", "winogrande", "ARC-Challenge", "ARC-Easy", "openbookqa"],
|
170 |
+
required=True)
|
171 |
+
parser.add_argument('--model', choices=['LLaMA-7B', "LLaMA-13B",'BLOOM-7B', 'GPT-j-6B'], required=True)
|
172 |
+
parser.add_argument('--adapter', choices=['LoRA', 'AdapterP', 'AdapterH', 'Parallel'],
|
173 |
+
required=True)
|
174 |
+
parser.add_argument('--base_model', required=True)
|
175 |
+
parser.add_argument('--lora_weights', required=True)
|
176 |
+
parser.add_argument('--batch_size', type=int, required=True)
|
177 |
+
parser.add_argument('--load_8bit', action='store_true', default=False)
|
178 |
+
|
179 |
+
return parser.parse_args()
|
180 |
+
|
181 |
+
|
182 |
+
def load_model(args) -> tuple:
|
183 |
+
"""
|
184 |
+
load tuned model
|
185 |
+
Args:
|
186 |
+
args:
|
187 |
+
|
188 |
+
Returns:
|
189 |
+
tuple(tokenizer, model)
|
190 |
+
"""
|
191 |
+
base_model = args.base_model
|
192 |
+
if not base_model:
|
193 |
+
raise ValueError(f'can not find base model name by the value: {args.model}')
|
194 |
+
lora_weights = args.lora_weights
|
195 |
+
if not lora_weights:
|
196 |
+
raise ValueError(f'can not find lora weight, the value is: {lora_weights}')
|
197 |
+
|
198 |
+
load_8bit = args.load_8bit
|
199 |
+
if "LLaMA" in args.model:
|
200 |
+
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
201 |
+
else:
|
202 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
203 |
+
tokenizer.padding_side = "left"
|
204 |
+
tokenizer.pad_token_id = (
|
205 |
+
0 # unk. we want this to be different from the eos token
|
206 |
+
)
|
207 |
+
if device == "cuda":
|
208 |
+
model = AutoModelForCausalLM.from_pretrained(
|
209 |
+
base_model,
|
210 |
+
load_in_8bit=load_8bit,
|
211 |
+
torch_dtype=torch.float16,
|
212 |
+
device_map="auto",
|
213 |
+
trust_remote_code=True,
|
214 |
+
) # fix zwq
|
215 |
+
model = PeftModel.from_pretrained(
|
216 |
+
model,
|
217 |
+
lora_weights,
|
218 |
+
torch_dtype=torch.float16,
|
219 |
+
device_map={"":0}
|
220 |
+
)
|
221 |
+
elif device == "mps":
|
222 |
+
model = AutoModelForCausalLM.from_pretrained(
|
223 |
+
base_model,
|
224 |
+
device_map={"": device},
|
225 |
+
torch_dtype=torch.float16,
|
226 |
+
)
|
227 |
+
model = PeftModel.from_pretrained(
|
228 |
+
model,
|
229 |
+
lora_weights,
|
230 |
+
device_map={"": device},
|
231 |
+
torch_dtype=torch.float16,
|
232 |
+
)
|
233 |
+
else:
|
234 |
+
model = AutoModelForCausalLM.from_pretrained(
|
235 |
+
base_model, device_map={"": device}, low_cpu_mem_usage=True
|
236 |
+
)
|
237 |
+
model = PeftModel.from_pretrained(
|
238 |
+
model,
|
239 |
+
lora_weights,
|
240 |
+
device_map={"": device},
|
241 |
+
)
|
242 |
+
|
243 |
+
# unwind broken decapoda-research config
|
244 |
+
model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
|
245 |
+
model.config.bos_token_id = 1
|
246 |
+
model.config.eos_token_id = 2
|
247 |
+
|
248 |
+
if not load_8bit:
|
249 |
+
model.half() # seems to fix bugs for some users.
|
250 |
+
|
251 |
+
model.eval()
|
252 |
+
if torch.__version__ >= "2" and sys.platform != "win32":
|
253 |
+
model = torch.compile(model)
|
254 |
+
|
255 |
+
return tokenizer, model
|
256 |
+
|
257 |
+
|
258 |
+
def load_instruction(args) -> str:
|
259 |
+
instruction = ''
|
260 |
+
if not instruction:
|
261 |
+
raise ValueError('instruct not initialized')
|
262 |
+
return instruction
|
263 |
+
|
264 |
+
|
265 |
+
def extract_answer(args, sentence: str) -> float:
|
266 |
+
dataset = args.dataset
|
267 |
+
if dataset == 'boolq':
|
268 |
+
sentence_ = sentence.strip()
|
269 |
+
pred_answers = re.findall(r'true|false', sentence_)
|
270 |
+
if not pred_answers:
|
271 |
+
return ""
|
272 |
+
return pred_answers[0]
|
273 |
+
elif dataset == 'piqa':
|
274 |
+
sentence_ = sentence.strip()
|
275 |
+
pred_answers = re.findall(r'solution1|solution2', sentence_)
|
276 |
+
if not pred_answers:
|
277 |
+
return ""
|
278 |
+
return pred_answers[0]
|
279 |
+
elif dataset in ['social_i_qa', 'ARC-Challenge', 'ARC-Easy', 'openbookqa']:
|
280 |
+
sentence_ = sentence.strip()
|
281 |
+
pred_answers = re.findall(r'answer1|answer2|answer3|answer4|answer5', sentence_)
|
282 |
+
if not pred_answers:
|
283 |
+
return ""
|
284 |
+
return pred_answers[0]
|
285 |
+
elif dataset == 'hellaswag':
|
286 |
+
sentence_ = sentence.strip()
|
287 |
+
pred_answers = re.findall(r'ending1|ending2|ending3|ending4', sentence_)
|
288 |
+
if not pred_answers:
|
289 |
+
return ""
|
290 |
+
return pred_answers[0]
|
291 |
+
elif dataset == 'winogrande':
|
292 |
+
sentence_ = sentence.strip()
|
293 |
+
pred_answers = re.findall(r'option1|option2', sentence_)
|
294 |
+
if not pred_answers:
|
295 |
+
return ""
|
296 |
+
return pred_answers[0]
|
297 |
+
|
298 |
+
|
299 |
+
if __name__ == "__main__":
|
300 |
+
main()
|
SVFT-main/LLM-Adapters/evaluate.py
ADDED
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import sys
|
6 |
+
import argparse
|
7 |
+
|
8 |
+
import fire
|
9 |
+
|
10 |
+
import torch
|
11 |
+
|
12 |
+
sys.path.append(os.path.join(os.getcwd(), "peft/src/"))
|
13 |
+
from peft import PeftModel
|
14 |
+
from tqdm import tqdm
|
15 |
+
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer
|
16 |
+
|
17 |
+
if torch.cuda.is_available():
|
18 |
+
device = "cuda"
|
19 |
+
else:
|
20 |
+
device = "cpu"
|
21 |
+
|
22 |
+
try:
|
23 |
+
if torch.backends.mps.is_available():
|
24 |
+
device = "mps"
|
25 |
+
except: # noqa: E722
|
26 |
+
pass
|
27 |
+
|
28 |
+
|
29 |
+
def main(
|
30 |
+
load_8bit: bool = False,
|
31 |
+
base_model: str = "",
|
32 |
+
lora_weights: str = "tloen/alpaca-lora-7b",
|
33 |
+
share_gradio: bool = False,
|
34 |
+
):
|
35 |
+
args = parse_args()
|
36 |
+
|
37 |
+
def evaluate(
|
38 |
+
instruction,
|
39 |
+
input=None,
|
40 |
+
temperature=0.1,
|
41 |
+
top_p=0.75,
|
42 |
+
top_k=40,
|
43 |
+
num_beams=4,
|
44 |
+
max_new_tokens=256,
|
45 |
+
**kwargs,
|
46 |
+
):
|
47 |
+
prompt = generate_prompt(instruction, input)
|
48 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
49 |
+
input_ids = inputs["input_ids"].to(device)
|
50 |
+
generation_config = GenerationConfig(
|
51 |
+
temperature=temperature,
|
52 |
+
top_p=top_p,
|
53 |
+
top_k=top_k,
|
54 |
+
num_beams=num_beams,
|
55 |
+
**kwargs,
|
56 |
+
)
|
57 |
+
with torch.no_grad():
|
58 |
+
generation_output = model.generate(
|
59 |
+
input_ids=input_ids,
|
60 |
+
generation_config=generation_config,
|
61 |
+
return_dict_in_generate=True,
|
62 |
+
output_scores=True,
|
63 |
+
max_new_tokens=max_new_tokens,
|
64 |
+
use_cache=False,
|
65 |
+
)
|
66 |
+
s = generation_output.sequences[0]
|
67 |
+
output = tokenizer.decode(s)
|
68 |
+
return output.split("### Response:")[1].strip()
|
69 |
+
|
70 |
+
"""
|
71 |
+
# testing code for readme
|
72 |
+
for instruction in [
|
73 |
+
"Tell me about alpacas.",
|
74 |
+
"Tell me about the president of Mexico in 2019.",
|
75 |
+
"Tell me about the king of France in 2019.",
|
76 |
+
"List all Canadian provinces in alphabetical order.",
|
77 |
+
"Write a Python program that prints the first 10 Fibonacci numbers.",
|
78 |
+
"Write a program that prints the numbers from 1 to 100. But for multiples of three print 'Fizz' instead of the number and for the multiples of five print 'Buzz'. For numbers which are multiples of both three and five print 'FizzBuzz'.", # noqa: E501
|
79 |
+
"Tell me five words that rhyme with 'shock'.",
|
80 |
+
"Translate the sentence 'I have no mouth but I must scream' into Spanish.",
|
81 |
+
"Count up from 1 to 500.",
|
82 |
+
]:
|
83 |
+
print("Instruction:", instruction)
|
84 |
+
print("Response:", evaluate(instruction))
|
85 |
+
print()
|
86 |
+
"""
|
87 |
+
save_file = f'experiment/{args.model}-{args.adapter}-{args.dataset}.json'
|
88 |
+
create_dir('experiment/')
|
89 |
+
|
90 |
+
dataset = load_data(args)
|
91 |
+
tokenizer, model = load_model(args)
|
92 |
+
total = len(dataset)
|
93 |
+
correct = 0
|
94 |
+
miss = 0.001
|
95 |
+
output_data = []
|
96 |
+
pbar = tqdm(total=total)
|
97 |
+
for idx, data in enumerate(dataset):
|
98 |
+
instruction = data.get('instruction')
|
99 |
+
|
100 |
+
outputs = evaluate(instruction)
|
101 |
+
label = data.get('answer')
|
102 |
+
flag = False
|
103 |
+
if args.dataset.lower() in ['aqua']:
|
104 |
+
predict = extract_answer_letter(args, outputs)
|
105 |
+
if label == predict:
|
106 |
+
correct += 1
|
107 |
+
flag = True
|
108 |
+
else:
|
109 |
+
if isinstance(label, str):
|
110 |
+
label = float(label)
|
111 |
+
predict = extract_answer_number(args, outputs)
|
112 |
+
if abs(label - predict) <= miss:
|
113 |
+
correct += 1
|
114 |
+
flag = True
|
115 |
+
new_data = copy.deepcopy(data)
|
116 |
+
new_data['output_pred'] = outputs
|
117 |
+
new_data['pred'] = predict
|
118 |
+
new_data['flag'] = flag
|
119 |
+
output_data.append(new_data)
|
120 |
+
print(' ')
|
121 |
+
print('---------------')
|
122 |
+
print(outputs)
|
123 |
+
print('prediction:', predict)
|
124 |
+
print('label:', label)
|
125 |
+
print('---------------')
|
126 |
+
print(f'\rtest:{idx + 1}/{total} | accuracy {correct} {correct / (idx + 1)}')
|
127 |
+
with open(save_file, 'w+') as f:
|
128 |
+
json.dump(output_data, f, indent=4)
|
129 |
+
pbar.update(1)
|
130 |
+
pbar.close()
|
131 |
+
print('\n')
|
132 |
+
print('test finished')
|
133 |
+
|
134 |
+
|
135 |
+
def create_dir(dir_path):
|
136 |
+
if not os.path.exists(dir_path):
|
137 |
+
os.mkdir(dir_path)
|
138 |
+
return
|
139 |
+
|
140 |
+
|
141 |
+
def generate_prompt(instruction, input=None):
|
142 |
+
if input:
|
143 |
+
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
144 |
+
|
145 |
+
### Instruction:
|
146 |
+
{instruction}
|
147 |
+
|
148 |
+
### Input:
|
149 |
+
{input}
|
150 |
+
|
151 |
+
### Response:
|
152 |
+
""" # noqa: E501
|
153 |
+
else:
|
154 |
+
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
155 |
+
|
156 |
+
### Instruction:
|
157 |
+
{instruction}
|
158 |
+
|
159 |
+
### Response:
|
160 |
+
""" # noqa: E501
|
161 |
+
|
162 |
+
|
163 |
+
def load_data(args) -> list:
|
164 |
+
"""
|
165 |
+
read data from dataset file
|
166 |
+
Args:
|
167 |
+
args:
|
168 |
+
|
169 |
+
Returns:
|
170 |
+
|
171 |
+
"""
|
172 |
+
file_path = f'dataset/{args.dataset}/test.json'
|
173 |
+
if not os.path.exists(file_path):
|
174 |
+
raise FileNotFoundError(f"can not find dataset file : {file_path}")
|
175 |
+
json_data = json.load(open(file_path, 'r'))
|
176 |
+
return json_data
|
177 |
+
|
178 |
+
|
179 |
+
def parse_args():
|
180 |
+
parser = argparse.ArgumentParser()
|
181 |
+
parser.add_argument('--dataset', choices=['AddSub', 'MultiArith', 'SingleEq', 'gsm8k', 'AQuA', 'SVAMP'],
|
182 |
+
required=True)
|
183 |
+
parser.add_argument('--model', choices=['LLaMA-7B', 'BLOOM-7B', 'GPT-j-6B'], required=True)
|
184 |
+
parser.add_argument('--adapter', choices=['LoRA', 'AdapterP', 'AdapterH', 'Parallel', 'Prefix'],
|
185 |
+
required=True)
|
186 |
+
parser.add_argument('--base_model', required=True)
|
187 |
+
parser.add_argument('--lora_weights', required=True)
|
188 |
+
parser.add_argument('--load_8bit', action='store_true', default=False)
|
189 |
+
|
190 |
+
return parser.parse_args()
|
191 |
+
|
192 |
+
|
193 |
+
def load_model(args) -> tuple:
|
194 |
+
"""
|
195 |
+
load tuned model
|
196 |
+
Args:
|
197 |
+
args:
|
198 |
+
|
199 |
+
Returns:
|
200 |
+
tuple(tokenizer, model)
|
201 |
+
"""
|
202 |
+
base_model = args.base_model
|
203 |
+
if not base_model:
|
204 |
+
raise ValueError(f'can not find base model name by the value: {args.model}')
|
205 |
+
lora_weights = args.lora_weights
|
206 |
+
if not lora_weights:
|
207 |
+
raise ValueError(f'can not find lora weight, the value is: {lora_weights}')
|
208 |
+
|
209 |
+
load_8bit = args.load_8bit
|
210 |
+
if args.model == 'LLaMA-7B':
|
211 |
+
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
212 |
+
else:
|
213 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
214 |
+
if device == "cuda":
|
215 |
+
model = AutoModelForCausalLM.from_pretrained(
|
216 |
+
base_model,
|
217 |
+
load_in_8bit=load_8bit,
|
218 |
+
torch_dtype=torch.float16,
|
219 |
+
device_map="auto",
|
220 |
+
trust_remote_code=True,
|
221 |
+
) # fix zwq
|
222 |
+
model = PeftModel.from_pretrained(
|
223 |
+
model,
|
224 |
+
lora_weights,
|
225 |
+
torch_dtype=torch.float16,
|
226 |
+
device_map={"":0}
|
227 |
+
)
|
228 |
+
elif device == "mps":
|
229 |
+
model = AutoModelForCausalLM.from_pretrained(
|
230 |
+
base_model,
|
231 |
+
device_map={"": device},
|
232 |
+
torch_dtype=torch.float16,
|
233 |
+
)
|
234 |
+
model = PeftModel.from_pretrained(
|
235 |
+
model,
|
236 |
+
lora_weights,
|
237 |
+
device_map={"": device},
|
238 |
+
torch_dtype=torch.float16,
|
239 |
+
)
|
240 |
+
else:
|
241 |
+
model = AutoModelForCausalLM.from_pretrained(
|
242 |
+
base_model, device_map={"": device}, low_cpu_mem_usage=True
|
243 |
+
)
|
244 |
+
model = PeftModel.from_pretrained(
|
245 |
+
model,
|
246 |
+
lora_weights,
|
247 |
+
device_map={"": device},
|
248 |
+
)
|
249 |
+
|
250 |
+
# unwind broken decapoda-research config
|
251 |
+
model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
|
252 |
+
model.config.bos_token_id = 1
|
253 |
+
model.config.eos_token_id = 2
|
254 |
+
|
255 |
+
if not load_8bit:
|
256 |
+
model.half() # seems to fix bugs for some users.
|
257 |
+
|
258 |
+
model.eval()
|
259 |
+
if torch.__version__ >= "2" and sys.platform != "win32":
|
260 |
+
model = torch.compile(model)
|
261 |
+
|
262 |
+
return tokenizer, model
|
263 |
+
|
264 |
+
|
265 |
+
def load_instruction(args) -> str:
|
266 |
+
instruction = ''
|
267 |
+
if not instruction:
|
268 |
+
raise ValueError('instruct not initialized')
|
269 |
+
return instruction
|
270 |
+
|
271 |
+
|
272 |
+
def extract_answer_number(args, sentence: str) -> float:
|
273 |
+
dataset = args.dataset.lower()
|
274 |
+
if dataset in ["multiarith", "addsub", "singleeq", "gsm8k", "svamp"]:
|
275 |
+
sentence = sentence.replace(',', '')
|
276 |
+
pred = [s for s in re.findall(r'-?\d+\.?\d*', sentence)]
|
277 |
+
if not pred:
|
278 |
+
return float('inf')
|
279 |
+
pred_answer = float(pred[-1])
|
280 |
+
else:
|
281 |
+
raise NotImplementedError(' not support dataset: {}'.format(dataset))
|
282 |
+
if isinstance(pred_answer, str):
|
283 |
+
try:
|
284 |
+
pred_answer = float(pred_answer)
|
285 |
+
except ValueError as e:
|
286 |
+
pred_answer = float('inf')
|
287 |
+
return pred_answer
|
288 |
+
|
289 |
+
|
290 |
+
def extract_answer_letter(args, sentence: str) -> str:
|
291 |
+
sentence_ = sentence.strip()
|
292 |
+
pred_answers = re.findall(r'A|B|C|D|E', sentence_)
|
293 |
+
if pred_answers:
|
294 |
+
if not pred_answers:
|
295 |
+
return ''
|
296 |
+
return pred_answers[0]
|
297 |
+
else:
|
298 |
+
return ''
|
299 |
+
|
300 |
+
|
301 |
+
if __name__ == "__main__":
|
302 |
+
fire.Fire(main)
|
SVFT-main/LLM-Adapters/export_hf_checkpoint.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import transformers
|
5 |
+
from peft import PeftModel
|
6 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer # noqa: F402
|
7 |
+
|
8 |
+
BASE_MODEL = os.environ.get("BASE_MODEL", None)
|
9 |
+
assert (
|
10 |
+
BASE_MODEL
|
11 |
+
), "Please specify a value for BASE_MODEL environment variable, e.g. `export BASE_MODEL=decapoda-research/llama-7b-hf`" # noqa: E501
|
12 |
+
|
13 |
+
tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
|
14 |
+
|
15 |
+
base_model = LlamaForCausalLM.from_pretrained(
|
16 |
+
BASE_MODEL,
|
17 |
+
load_in_8bit=False,
|
18 |
+
torch_dtype=torch.float16,
|
19 |
+
device_map={"": "cpu"},
|
20 |
+
)
|
21 |
+
|
22 |
+
first_weight = base_model.model.layers[0].self_attn.q_proj.weight
|
23 |
+
first_weight_old = first_weight.clone()
|
24 |
+
|
25 |
+
lora_model = PeftModel.from_pretrained(
|
26 |
+
base_model,
|
27 |
+
"tloen/alpaca-lora-7b",
|
28 |
+
device_map={"": "cpu"},
|
29 |
+
torch_dtype=torch.float16,
|
30 |
+
)
|
31 |
+
|
32 |
+
lora_weight = lora_model.base_model.model.model.layers[
|
33 |
+
0
|
34 |
+
].self_attn.q_proj.weight
|
35 |
+
|
36 |
+
assert torch.allclose(first_weight_old, first_weight)
|
37 |
+
|
38 |
+
# merge weights
|
39 |
+
for layer in lora_model.base_model.model.model.layers:
|
40 |
+
layer.self_attn.q_proj.merge_weights = True
|
41 |
+
layer.self_attn.v_proj.merge_weights = True
|
42 |
+
|
43 |
+
lora_model.train(False)
|
44 |
+
|
45 |
+
# did we do anything?
|
46 |
+
assert not torch.allclose(first_weight_old, first_weight)
|
47 |
+
|
48 |
+
lora_model_sd = lora_model.state_dict()
|
49 |
+
deloreanized_sd = {
|
50 |
+
k.replace("base_model.model.", ""): v
|
51 |
+
for k, v in lora_model_sd.items()
|
52 |
+
if "lora" not in k
|
53 |
+
}
|
54 |
+
|
55 |
+
LlamaForCausalLM.save_pretrained(
|
56 |
+
base_model, "./hf_ckpt", state_dict=deloreanized_sd, max_shard_size="400MB"
|
57 |
+
)
|
SVFT-main/LLM-Adapters/export_state_dict_checkpoint.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import transformers
|
6 |
+
from peft import PeftModel
|
7 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer # noqa: E402
|
8 |
+
|
9 |
+
BASE_MODEL = os.environ.get("BASE_MODEL", None)
|
10 |
+
assert (
|
11 |
+
BASE_MODEL
|
12 |
+
), "Please specify a value for BASE_MODEL environment variable, e.g. `export BASE_MODEL=decapoda-research/llama-7b-hf`" # noqa: E501
|
13 |
+
|
14 |
+
tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
|
15 |
+
|
16 |
+
base_model = LlamaForCausalLM.from_pretrained(
|
17 |
+
BASE_MODEL,
|
18 |
+
load_in_8bit=False,
|
19 |
+
torch_dtype=torch.float16,
|
20 |
+
device_map={"": "cpu"},
|
21 |
+
)
|
22 |
+
|
23 |
+
lora_model = PeftModel.from_pretrained(
|
24 |
+
base_model,
|
25 |
+
"tloen/alpaca-lora-7b",
|
26 |
+
device_map={"": "cpu"},
|
27 |
+
torch_dtype=torch.float16,
|
28 |
+
)
|
29 |
+
|
30 |
+
# merge weights
|
31 |
+
for layer in lora_model.base_model.model.model.layers:
|
32 |
+
layer.self_attn.q_proj.merge_weights = True
|
33 |
+
layer.self_attn.v_proj.merge_weights = True
|
34 |
+
|
35 |
+
lora_model.train(False)
|
36 |
+
|
37 |
+
lora_model_sd = lora_model.state_dict()
|
38 |
+
|
39 |
+
params = {
|
40 |
+
"dim": 4096,
|
41 |
+
"multiple_of": 256,
|
42 |
+
"n_heads": 32,
|
43 |
+
"n_layers": 32,
|
44 |
+
"norm_eps": 1e-06,
|
45 |
+
"vocab_size": -1,
|
46 |
+
}
|
47 |
+
n_layers = params["n_layers"]
|
48 |
+
n_heads = params["n_heads"]
|
49 |
+
dim = params["dim"]
|
50 |
+
dims_per_head = dim // n_heads
|
51 |
+
base = 10000.0
|
52 |
+
inv_freq = 1.0 / (
|
53 |
+
base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head)
|
54 |
+
)
|
55 |
+
|
56 |
+
|
57 |
+
def permute(w):
|
58 |
+
return (
|
59 |
+
w.view(n_heads, dim // n_heads // 2, 2, dim)
|
60 |
+
.transpose(1, 2)
|
61 |
+
.reshape(dim, dim)
|
62 |
+
)
|
63 |
+
|
64 |
+
|
65 |
+
def unpermute(w):
|
66 |
+
return (
|
67 |
+
w.view(n_heads, 2, dim // n_heads // 2, dim)
|
68 |
+
.transpose(1, 2)
|
69 |
+
.reshape(dim, dim)
|
70 |
+
)
|
71 |
+
|
72 |
+
|
73 |
+
def translate_state_dict_key(k): # noqa: C901
|
74 |
+
k = k.replace("base_model.model.", "")
|
75 |
+
if k == "model.embed_tokens.weight":
|
76 |
+
return "tok_embeddings.weight"
|
77 |
+
elif k == "model.norm.weight":
|
78 |
+
return "norm.weight"
|
79 |
+
elif k == "lm_head.weight":
|
80 |
+
return "output.weight"
|
81 |
+
elif k.startswith("model.layers."):
|
82 |
+
layer = k.split(".")[2]
|
83 |
+
if k.endswith(".self_attn.q_proj.weight"):
|
84 |
+
return f"layers.{layer}.attention.wq.weight"
|
85 |
+
elif k.endswith(".self_attn.k_proj.weight"):
|
86 |
+
return f"layers.{layer}.attention.wk.weight"
|
87 |
+
elif k.endswith(".self_attn.v_proj.weight"):
|
88 |
+
return f"layers.{layer}.attention.wv.weight"
|
89 |
+
elif k.endswith(".self_attn.o_proj.weight"):
|
90 |
+
return f"layers.{layer}.attention.wo.weight"
|
91 |
+
elif k.endswith(".mlp.gate_proj.weight"):
|
92 |
+
return f"layers.{layer}.feed_forward.w1.weight"
|
93 |
+
elif k.endswith(".mlp.down_proj.weight"):
|
94 |
+
return f"layers.{layer}.feed_forward.w2.weight"
|
95 |
+
elif k.endswith(".mlp.up_proj.weight"):
|
96 |
+
return f"layers.{layer}.feed_forward.w3.weight"
|
97 |
+
elif k.endswith(".input_layernorm.weight"):
|
98 |
+
return f"layers.{layer}.attention_norm.weight"
|
99 |
+
elif k.endswith(".post_attention_layernorm.weight"):
|
100 |
+
return f"layers.{layer}.ffn_norm.weight"
|
101 |
+
elif k.endswith("rotary_emb.inv_freq") or "lora" in k:
|
102 |
+
return None
|
103 |
+
else:
|
104 |
+
print(layer, k)
|
105 |
+
raise NotImplementedError
|
106 |
+
else:
|
107 |
+
print(k)
|
108 |
+
raise NotImplementedError
|
109 |
+
|
110 |
+
|
111 |
+
new_state_dict = {}
|
112 |
+
for k, v in lora_model_sd.items():
|
113 |
+
new_k = translate_state_dict_key(k)
|
114 |
+
if new_k is not None:
|
115 |
+
if "wq" in new_k or "wk" in new_k:
|
116 |
+
new_state_dict[new_k] = unpermute(v)
|
117 |
+
else:
|
118 |
+
new_state_dict[new_k] = v
|
119 |
+
|
120 |
+
os.makedirs("./ckpt", exist_ok=True)
|
121 |
+
|
122 |
+
torch.save(new_state_dict, "./ckpt/consolidated.00.pth")
|
123 |
+
|
124 |
+
with open("./ckpt/params.json", "w") as f:
|
125 |
+
json.dump(params, f)
|
SVFT-main/LLM-Adapters/finetune.py
ADDED
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from typing import List
|
4 |
+
|
5 |
+
import fire
|
6 |
+
import torch
|
7 |
+
import argparse
|
8 |
+
import transformers
|
9 |
+
from datasets import load_dataset
|
10 |
+
from typing import List, Optional, Union
|
11 |
+
|
12 |
+
from tqdm import tqdm
|
13 |
+
import sys
|
14 |
+
from functools import partial, reduce
|
15 |
+
sys.path.append("../")
|
16 |
+
from svft.svft_layers import LinearWithSVFT, create_and_replace_modules, get_target_modules_list, replace_svft_with_fused_linear
|
17 |
+
|
18 |
+
"""
|
19 |
+
Unused imports:
|
20 |
+
import torch.nn as nn
|
21 |
+
import bitsandbytes as bnb
|
22 |
+
"""
|
23 |
+
sys.path.append(os.path.join(os.getcwd(), "peft/src/"))
|
24 |
+
|
25 |
+
from peft import ( # noqa: E402
|
26 |
+
LoraConfig, BOFTConfig, VeraConfig,
|
27 |
+
PrefixTuningConfig,
|
28 |
+
get_peft_model,
|
29 |
+
get_peft_model_state_dict,
|
30 |
+
set_peft_model_state_dict,
|
31 |
+
)
|
32 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, AutoModel # noqa: F402
|
33 |
+
|
34 |
+
|
35 |
+
def train(
|
36 |
+
# model/data params
|
37 |
+
base_model: str = "", # the only required argument
|
38 |
+
data_path: str = "yahma/alpaca-cleaned",
|
39 |
+
output_dir: str = "./lora-alpaca",
|
40 |
+
adapter_name: str = "lora",
|
41 |
+
load_8bit : bool = False,
|
42 |
+
# training hyperparams
|
43 |
+
batch_size: int = 128,
|
44 |
+
micro_batch_size: int = 4,
|
45 |
+
num_epochs: int = 3,
|
46 |
+
learning_rate: float = 3e-4,
|
47 |
+
cutoff_len: int = 256,
|
48 |
+
val_set_size: int = 2000,
|
49 |
+
use_gradient_checkpointing: bool = False,
|
50 |
+
eval_step: int = 200,
|
51 |
+
save_step: int = 200,
|
52 |
+
# lora hyperparams
|
53 |
+
lora_r: int = None,
|
54 |
+
lora_alpha: int = 16,
|
55 |
+
lora_dropout: float = 0.05,
|
56 |
+
lora_target_modules: List[str] = None,
|
57 |
+
# bottleneck adapter hyperparams
|
58 |
+
bottleneck_size: int = 256,
|
59 |
+
non_linearity: str = "tanh",
|
60 |
+
adapter_dropout: float = 0.0,
|
61 |
+
use_parallel_adapter: bool = False,
|
62 |
+
use_adapterp: bool = False,
|
63 |
+
target_modules: List[str] = None,
|
64 |
+
scaling: Union[float, str] = 1.0,
|
65 |
+
# prefix tuning hyperparams
|
66 |
+
num_virtual_tokens: int = 30,
|
67 |
+
# llm hyperparams
|
68 |
+
train_on_inputs: bool = True, # if False, masks out inputs in loss
|
69 |
+
group_by_length: bool = False, # faster, but produces an odd training loss curve
|
70 |
+
# wandb params
|
71 |
+
wandb_project: str = "",
|
72 |
+
wandb_run_name: str = "",
|
73 |
+
wandb_watch: str = "", # options: false | gradients | all
|
74 |
+
wandb_log_model: str = "", # options: false | true
|
75 |
+
resume_from_checkpoint: str = None, # either training checkpoint or final adapter
|
76 |
+
off_diag: int = 0,
|
77 |
+
pattern: str = "banded",
|
78 |
+
fill_orthonormal: bool = False,
|
79 |
+
):
|
80 |
+
print(
|
81 |
+
f"Finetuning model with params:\n"
|
82 |
+
f"base_model: {base_model}\n"
|
83 |
+
f"data_path: {data_path}\n"
|
84 |
+
f"output_dir: {output_dir}\n"
|
85 |
+
f"batch_size: {batch_size}\n"
|
86 |
+
f"micro_batch_size: {micro_batch_size}\n"
|
87 |
+
f"num_epochs: {num_epochs}\n"
|
88 |
+
f"learning_rate: {learning_rate}\n"
|
89 |
+
f"cutoff_len: {cutoff_len}\n"
|
90 |
+
f"val_set_size: {val_set_size}\n"
|
91 |
+
f"use_gradient_checkpointing: {use_gradient_checkpointing}\n"
|
92 |
+
f"lora_r: {lora_r}\n"
|
93 |
+
f"lora_alpha: {lora_alpha}\n"
|
94 |
+
f"lora_dropout: {lora_dropout}\n"
|
95 |
+
f"lora_target_modules: {lora_target_modules}\n"
|
96 |
+
f"bottleneck_size: {bottleneck_size}\n"
|
97 |
+
f"non_linearity: {non_linearity}\n"
|
98 |
+
f"adapter_dropout: {adapter_dropout}\n"
|
99 |
+
f"use_parallel_adapter: {use_parallel_adapter}\n"
|
100 |
+
f"use_adapterp: {use_adapterp}\n"
|
101 |
+
f"train_on_inputs: {train_on_inputs}\n"
|
102 |
+
f"scaling: {scaling}\n"
|
103 |
+
f"adapter_name: {adapter_name}\n"
|
104 |
+
f"target_modules: {target_modules}\n"
|
105 |
+
f"group_by_length: {group_by_length}\n"
|
106 |
+
f"wandb_project: {wandb_project}\n"
|
107 |
+
f"wandb_run_name: {wandb_run_name}\n"
|
108 |
+
f"wandb_watch: {wandb_watch}\n"
|
109 |
+
f"wandb_log_model: {wandb_log_model}\n"
|
110 |
+
f"resume_from_checkpoint: {resume_from_checkpoint}\n"
|
111 |
+
)
|
112 |
+
|
113 |
+
print(base_model)
|
114 |
+
|
115 |
+
# assert (
|
116 |
+
# base_model
|
117 |
+
# ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'"
|
118 |
+
gradient_accumulation_steps = batch_size // micro_batch_size
|
119 |
+
|
120 |
+
device_map = "auto"
|
121 |
+
world_size = int(os.environ.get("WORLD_SIZE", 1))
|
122 |
+
ddp = world_size != 1
|
123 |
+
if ddp:
|
124 |
+
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
|
125 |
+
gradient_accumulation_steps = gradient_accumulation_steps // world_size
|
126 |
+
|
127 |
+
# Check if parameter passed or if set within environ
|
128 |
+
use_wandb = len(wandb_project) > 0 or (
|
129 |
+
"WANDB_PROJECT" in os.environ and len(os.environ["WANDB_PROJECT"]) > 0
|
130 |
+
)
|
131 |
+
# Only overwrite environ if wandb param passed
|
132 |
+
if len(wandb_project) > 0:
|
133 |
+
os.environ["WANDB_PROJECT"] = "CommonsenseReasoning"
|
134 |
+
if len(wandb_watch) > 0:
|
135 |
+
os.environ["WANDB_WATCH"] = "all"
|
136 |
+
if len(wandb_log_model) > 0:
|
137 |
+
os.environ["WANDB_LOG_MODEL"] = False
|
138 |
+
|
139 |
+
if load_8bit:
|
140 |
+
model = AutoModelForCausalLM.from_pretrained(
|
141 |
+
base_model,
|
142 |
+
load_in_8bit=load_8bit,
|
143 |
+
torch_dtype=torch.float16,
|
144 |
+
device_map=device_map,
|
145 |
+
trust_remote_code=True,
|
146 |
+
)
|
147 |
+
else:
|
148 |
+
model = AutoModelForCausalLM.from_pretrained(
|
149 |
+
base_model,
|
150 |
+
load_in_8bit=False,
|
151 |
+
torch_dtype=torch.float32,
|
152 |
+
device_map={"": int(os.environ.get("LOCAL_RANK") or 0)},
|
153 |
+
trust_remote_code=True,
|
154 |
+
#revision="step143000",
|
155 |
+
)
|
156 |
+
|
157 |
+
if model.config.model_type == "llama":
|
158 |
+
# Due to the name of transformers' LlamaTokenizer, we have to do this
|
159 |
+
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
160 |
+
else:
|
161 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
162 |
+
|
163 |
+
tokenizer.pad_token_id = (
|
164 |
+
0 # unk. we want this to be different from the eos token
|
165 |
+
)
|
166 |
+
tokenizer.padding_side = "left" # Allow batched inference
|
167 |
+
|
168 |
+
def tokenize(prompt, add_eos_token=True):
|
169 |
+
# there's probably a way to do this with the tokenizer settings
|
170 |
+
# but again, gotta move fast
|
171 |
+
result = tokenizer(
|
172 |
+
prompt,
|
173 |
+
truncation=True,
|
174 |
+
max_length=cutoff_len,
|
175 |
+
padding=False,
|
176 |
+
return_tensors=None,
|
177 |
+
)
|
178 |
+
if (
|
179 |
+
result["input_ids"][-1] != tokenizer.eos_token_id
|
180 |
+
and len(result["input_ids"]) < cutoff_len
|
181 |
+
and add_eos_token
|
182 |
+
):
|
183 |
+
result["input_ids"].append(tokenizer.eos_token_id)
|
184 |
+
if "chatglm" not in base_model:
|
185 |
+
result["attention_mask"].append(1)
|
186 |
+
|
187 |
+
result["labels"] = result["input_ids"].copy()
|
188 |
+
|
189 |
+
if "chatglm" in base_model:
|
190 |
+
return {"input_ids": result["input_ids"], "labels": result["labels"]}
|
191 |
+
else:
|
192 |
+
return result
|
193 |
+
|
194 |
+
def generate_and_tokenize_prompt(data_point):
|
195 |
+
full_prompt = generate_prompt(data_point)
|
196 |
+
tokenized_full_prompt = tokenize(full_prompt)
|
197 |
+
if not train_on_inputs:
|
198 |
+
user_prompt = generate_prompt({**data_point, "output": ""})
|
199 |
+
tokenized_user_prompt = tokenize(user_prompt, add_eos_token=False)
|
200 |
+
user_prompt_len = len(tokenized_user_prompt["input_ids"])
|
201 |
+
|
202 |
+
tokenized_full_prompt["labels"] = [
|
203 |
+
-100
|
204 |
+
] * user_prompt_len + tokenized_full_prompt["labels"][
|
205 |
+
user_prompt_len:
|
206 |
+
] # could be sped up, probably
|
207 |
+
return tokenized_full_prompt
|
208 |
+
|
209 |
+
if adapter_name == "lora":
|
210 |
+
config = LoraConfig(
|
211 |
+
r=lora_r,
|
212 |
+
lora_alpha=lora_alpha,
|
213 |
+
target_modules=lora_target_modules,
|
214 |
+
lora_dropout=lora_dropout,
|
215 |
+
bias="none",
|
216 |
+
task_type="CAUSAL_LM",
|
217 |
+
)
|
218 |
+
|
219 |
+
elif adapter_name == "dora":
|
220 |
+
config = LoraConfig(
|
221 |
+
use_dora=True,
|
222 |
+
r=lora_r,
|
223 |
+
lora_alpha=lora_alpha,
|
224 |
+
target_modules=lora_target_modules,
|
225 |
+
lora_dropout=lora_dropout,
|
226 |
+
bias="none",
|
227 |
+
task_type="CAUSAL_LM",
|
228 |
+
)
|
229 |
+
|
230 |
+
elif adapter_name == "boft":
|
231 |
+
config = BOFTConfig(
|
232 |
+
boft_block_size=8,
|
233 |
+
boft_n_butterfly_factor=2,
|
234 |
+
target_modules=lora_target_modules,
|
235 |
+
boft_dropout=0.05,
|
236 |
+
bias="boft_only",
|
237 |
+
)
|
238 |
+
|
239 |
+
elif adapter_name == "boft_r1":
|
240 |
+
config = BOFTConfig(
|
241 |
+
boft_block_size=1,
|
242 |
+
boft_n_butterfly_factor=1,
|
243 |
+
target_modules=lora_target_modules,
|
244 |
+
boft_dropout=0.05,
|
245 |
+
bias="boft_only",
|
246 |
+
)
|
247 |
+
|
248 |
+
elif adapter_name == "vera":
|
249 |
+
config = VeraConfig(r=lora_r, target_modules=lora_target_modules)
|
250 |
+
|
251 |
+
if adapter_name == 'svft':
|
252 |
+
# for SVFT turn off gradient requirement for all layers
|
253 |
+
# PEFT library handles this internally
|
254 |
+
for param in model.parameters():
|
255 |
+
param.requires_grad = False
|
256 |
+
|
257 |
+
print(f"Target Modules: {lora_target_modules}")
|
258 |
+
assign_svft_layer = partial(LinearWithSVFT,
|
259 |
+
off_diag=off_diag,
|
260 |
+
pattern=pattern,
|
261 |
+
rank=lora_r,
|
262 |
+
fill_orthonormal=fill_orthonormal)
|
263 |
+
|
264 |
+
create_and_replace_modules(model, get_target_modules_list(model, lora_target_modules), assign_svft_layer)
|
265 |
+
|
266 |
+
elif adapter_name == "full_ft":
|
267 |
+
pass
|
268 |
+
else:
|
269 |
+
# for baseline peft models
|
270 |
+
model = get_peft_model(model, config)
|
271 |
+
|
272 |
+
if adapter_name == "prefix-tuning":
|
273 |
+
model.to('cuda')
|
274 |
+
|
275 |
+
if data_path.endswith(".json"): # todo: support jsonl
|
276 |
+
data = load_dataset("json", data_files=data_path)
|
277 |
+
else:
|
278 |
+
data = load_dataset(data_path)
|
279 |
+
|
280 |
+
print(f"Trainable Parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
|
281 |
+
print(f"Output Dir: {output_dir}")
|
282 |
+
|
283 |
+
if val_set_size > 0:
|
284 |
+
train_val = data["train"].train_test_split(
|
285 |
+
test_size=val_set_size, shuffle=True, seed=42
|
286 |
+
)
|
287 |
+
train_data = (
|
288 |
+
train_val["train"].shuffle().map(generate_and_tokenize_prompt)
|
289 |
+
)
|
290 |
+
val_data = (
|
291 |
+
train_val["test"].shuffle().map(generate_and_tokenize_prompt)
|
292 |
+
)
|
293 |
+
else:
|
294 |
+
train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
|
295 |
+
val_data = None
|
296 |
+
|
297 |
+
if not ddp and torch.cuda.device_count() > 1:
|
298 |
+
# keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
|
299 |
+
model.is_parallelizable = True
|
300 |
+
model.model_parallel = True
|
301 |
+
|
302 |
+
trainer = transformers.Trainer(
|
303 |
+
model=model,
|
304 |
+
train_dataset=train_data,
|
305 |
+
eval_dataset=val_data,
|
306 |
+
args=transformers.TrainingArguments(
|
307 |
+
per_device_train_batch_size=micro_batch_size,
|
308 |
+
gradient_accumulation_steps=gradient_accumulation_steps,
|
309 |
+
warmup_steps=100,
|
310 |
+
num_train_epochs=num_epochs,
|
311 |
+
learning_rate=learning_rate,
|
312 |
+
bf16=True,
|
313 |
+
logging_steps=10,
|
314 |
+
optim="adamw_torch",
|
315 |
+
evaluation_strategy="steps" if val_set_size > 0 else "no",
|
316 |
+
save_strategy="steps",
|
317 |
+
eval_steps=eval_step if val_set_size > 0 else None,
|
318 |
+
save_steps=save_step,
|
319 |
+
output_dir=output_dir,
|
320 |
+
save_total_limit=3,
|
321 |
+
load_best_model_at_end=False if val_set_size > 0 else False,
|
322 |
+
ddp_find_unused_parameters=False if ddp else None,
|
323 |
+
group_by_length=group_by_length,
|
324 |
+
report_to="wandb" if use_wandb else None,
|
325 |
+
run_name=wandb_run_name if use_wandb else None,
|
326 |
+
#deepspeed="deepspeed.json"
|
327 |
+
),
|
328 |
+
data_collator=transformers.DataCollatorForSeq2Seq(
|
329 |
+
tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
|
330 |
+
),
|
331 |
+
)
|
332 |
+
model.config.use_cache = False
|
333 |
+
|
334 |
+
if adapter_name not in ['boft', 'svft']:
|
335 |
+
model = model.bfloat16()
|
336 |
+
|
337 |
+
trainer.train(resume_from_checkpoint=resume_from_checkpoint)
|
338 |
+
|
339 |
+
model.generation_config.temperature = 1.0
|
340 |
+
model.generation_config.top_p = 1.0
|
341 |
+
|
342 |
+
if adapter_name == 'svft':
|
343 |
+
replace_svft_with_fused_linear(model, get_target_modules_list(model, lora_target_modules))
|
344 |
+
elif adapter_name=="full_ft":
|
345 |
+
pass
|
346 |
+
else:
|
347 |
+
model = model.merge_and_unload()
|
348 |
+
|
349 |
+
for param in model.parameters():
|
350 |
+
param.data = param.data.contiguous()
|
351 |
+
model.save_pretrained(output_dir)
|
352 |
+
tokenizer.save_pretrained(output_dir)
|
353 |
+
|
354 |
+
print(
|
355 |
+
"\n If there's a warning about missing keys above, please disregard :)"
|
356 |
+
)
|
357 |
+
|
358 |
+
|
359 |
+
def generate_prompt(data_point):
|
360 |
+
# sorry about the formatting disaster gotta move fast
|
361 |
+
if data_point["input"]:
|
362 |
+
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
363 |
+
|
364 |
+
### Instruction:
|
365 |
+
{data_point["instruction"]}
|
366 |
+
|
367 |
+
### Input:
|
368 |
+
{data_point["input"]}
|
369 |
+
|
370 |
+
### Response:
|
371 |
+
{data_point["output"]}""" # noqa: E501
|
372 |
+
else:
|
373 |
+
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
374 |
+
|
375 |
+
### Instruction:
|
376 |
+
{data_point["instruction"]}
|
377 |
+
|
378 |
+
### Response:
|
379 |
+
{data_point["output"]}""" # noqa: E501
|
380 |
+
|
381 |
+
|
382 |
+
def parse_args():
|
383 |
+
parser = argparse.ArgumentParser(description='Train a model')
|
384 |
+
|
385 |
+
# model/data params
|
386 |
+
parser.add_argument('--base_model', type=str, required=True, help='Base model')
|
387 |
+
parser.add_argument('--data_path', type=str, default='yahma/alpaca-cleaned', help='Data path')
|
388 |
+
parser.add_argument('--output_dir', type=str, default='./lora-alpaca', help='Output directory')
|
389 |
+
parser.add_argument('--adapter_name', type=str, default='lora', help='Adapter name')
|
390 |
+
parser.add_argument('--load_8bit', action='store_true', help='Load 8-bit')
|
391 |
+
|
392 |
+
# training hyperparams
|
393 |
+
parser.add_argument('--batch_size', type=int, default=128, help='Batch size')
|
394 |
+
parser.add_argument('--micro_batch_size', type=int, default=4, help='Micro batch size')
|
395 |
+
parser.add_argument('--num_epochs', type=int, default=3, help='Number of epochs')
|
396 |
+
parser.add_argument('--learning_rate', type=float, default=3e-4, help='Learning rate')
|
397 |
+
parser.add_argument('--cutoff_len', type=int, default=256, help='Cutoff length')
|
398 |
+
parser.add_argument('--val_set_size', type=int, default=2000, help='Validation set size')
|
399 |
+
parser.add_argument('--use_gradient_checkpointing', action='store_true', help='Use gradient checkpointing')
|
400 |
+
parser.add_argument('--eval_step', type=int, default=200, help='Evaluation step')
|
401 |
+
parser.add_argument('--save_step', type=int, default=200, help='Save step')
|
402 |
+
|
403 |
+
# lora hyperparams
|
404 |
+
parser.add_argument('--lora_r', type=int, default=8, help='Lora r')
|
405 |
+
parser.add_argument('--lora_alpha', type=int, default=16, help='Lora alpha')
|
406 |
+
parser.add_argument('--lora_dropout', type=float, default=0.05, help='Lora dropout')
|
407 |
+
parser.add_argument('--lora_target_modules', nargs='+', help='Lora target modules')
|
408 |
+
|
409 |
+
# bottleneck adapter hyperparams
|
410 |
+
parser.add_argument('--bottleneck_size', type=int, default=256, help='Bottleneck size')
|
411 |
+
parser.add_argument('--non_linearity', type=str, default='tanh', help='Non-linearity')
|
412 |
+
parser.add_argument('--adapter_dropout', type=float, default=0.0, help='Adapter dropout')
|
413 |
+
parser.add_argument('--use_parallel_adapter', action='store_true', help='Use parallel adapter')
|
414 |
+
parser.add_argument('--use_adapterp', action='store_true', help='Use adapterp')
|
415 |
+
parser.add_argument('--target_modules', nargs='+', help='Target modules')
|
416 |
+
parser.add_argument('--scaling', type=Union[float, str], default=1.0, help='Scaling')
|
417 |
+
|
418 |
+
# prefix tuning hyperparams
|
419 |
+
parser.add_argument('--num_virtual_tokens', type=int, default=30, help='Number of virtual tokens')
|
420 |
+
|
421 |
+
# llm hyperparams
|
422 |
+
parser.add_argument('--train_on_inputs', action='store_true', help='Train on inputs')
|
423 |
+
parser.add_argument('--group_by_length', action='store_true', help='Group by length')
|
424 |
+
|
425 |
+
# wandb params
|
426 |
+
parser.add_argument('--wandb_project', type=str, default='', help='Wandb project')
|
427 |
+
parser.add_argument('--wandb_run_name', type=str, default='', help='Wandb run name')
|
428 |
+
parser.add_argument('--wandb_watch', type=str, default='', help='Wandb watch')
|
429 |
+
parser.add_argument('--wandb_log_model', type=str, default='', help='Wandb log model')
|
430 |
+
parser.add_argument('--resume_from_checkpoint', type=str, help='Resume from checkpoint')
|
431 |
+
|
432 |
+
return parser.parse_args()
|
433 |
+
|
434 |
+
if __name__ == "__main__":
|
435 |
+
fire.Fire(train)
|
436 |
+
|
437 |
+
# args = parse_args()
|
438 |
+
# train(**vars(args))
|
SVFT-main/LLM-Adapters/ft-training_set/commonsense_15k.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
SVFT-main/LLM-Adapters/generate.py
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
|
4 |
+
import fire
|
5 |
+
import gradio as gr
|
6 |
+
import torch
|
7 |
+
import transformers
|
8 |
+
from peft import PeftModel
|
9 |
+
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
|
10 |
+
|
11 |
+
if torch.cuda.is_available():
|
12 |
+
device = "cuda"
|
13 |
+
else:
|
14 |
+
device = "cpu"
|
15 |
+
|
16 |
+
try:
|
17 |
+
if torch.backends.mps.is_available():
|
18 |
+
device = "mps"
|
19 |
+
except: # noqa: E722
|
20 |
+
pass
|
21 |
+
|
22 |
+
|
23 |
+
def main(
|
24 |
+
load_8bit: bool = False,
|
25 |
+
base_model: str = "",
|
26 |
+
lora_weights: str = "tloen/alpaca-lora-7b",
|
27 |
+
share_gradio: bool = False,
|
28 |
+
):
|
29 |
+
assert (
|
30 |
+
base_model
|
31 |
+
), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'"
|
32 |
+
|
33 |
+
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
34 |
+
if device == "cuda":
|
35 |
+
model = LlamaForCausalLM.from_pretrained(
|
36 |
+
base_model,
|
37 |
+
load_in_8bit=load_8bit,
|
38 |
+
torch_dtype=torch.float16,
|
39 |
+
device_map="auto",
|
40 |
+
trust_remote_code=True,
|
41 |
+
)
|
42 |
+
model = PeftModel.from_pretrained(
|
43 |
+
model,
|
44 |
+
lora_weights,
|
45 |
+
torch_dtype=torch.float16,
|
46 |
+
)
|
47 |
+
elif device == "mps":
|
48 |
+
model = LlamaForCausalLM.from_pretrained(
|
49 |
+
base_model,
|
50 |
+
device_map={"": device},
|
51 |
+
torch_dtype=torch.float16,
|
52 |
+
)
|
53 |
+
model = PeftModel.from_pretrained(
|
54 |
+
model,
|
55 |
+
lora_weights,
|
56 |
+
device_map={"": device},
|
57 |
+
torch_dtype=torch.float16,
|
58 |
+
)
|
59 |
+
else:
|
60 |
+
model = LlamaForCausalLM.from_pretrained(
|
61 |
+
base_model, device_map={"": device}, low_cpu_mem_usage=True
|
62 |
+
)
|
63 |
+
model = PeftModel.from_pretrained(
|
64 |
+
model,
|
65 |
+
lora_weights,
|
66 |
+
device_map={"": device},
|
67 |
+
)
|
68 |
+
|
69 |
+
# unwind broken decapoda-research config
|
70 |
+
model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
|
71 |
+
model.config.bos_token_id = 1
|
72 |
+
model.config.eos_token_id = 2
|
73 |
+
|
74 |
+
if not load_8bit:
|
75 |
+
model.half() # seems to fix bugs for some users.
|
76 |
+
|
77 |
+
model.eval()
|
78 |
+
if torch.__version__ >= "2" and sys.platform != "win32":
|
79 |
+
model = torch.compile(model)
|
80 |
+
|
81 |
+
def evaluate(
|
82 |
+
instruction,
|
83 |
+
input=None,
|
84 |
+
temperature=0.1,
|
85 |
+
top_p=0.75,
|
86 |
+
top_k=40,
|
87 |
+
num_beams=4,
|
88 |
+
max_new_tokens=128,
|
89 |
+
**kwargs,
|
90 |
+
):
|
91 |
+
prompt = generate_prompt(instruction, input)
|
92 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
93 |
+
input_ids = inputs["input_ids"].to(device)
|
94 |
+
generation_config = GenerationConfig(
|
95 |
+
temperature=temperature,
|
96 |
+
top_p=top_p,
|
97 |
+
top_k=top_k,
|
98 |
+
num_beams=num_beams,
|
99 |
+
**kwargs,
|
100 |
+
)
|
101 |
+
with torch.no_grad():
|
102 |
+
generation_output = model.generate(
|
103 |
+
input_ids=input_ids,
|
104 |
+
generation_config=generation_config,
|
105 |
+
return_dict_in_generate=True,
|
106 |
+
output_scores=True,
|
107 |
+
max_new_tokens=max_new_tokens,
|
108 |
+
)
|
109 |
+
s = generation_output.sequences[0]
|
110 |
+
output = tokenizer.decode(s)
|
111 |
+
return output.split("### Response:")[1].strip()
|
112 |
+
|
113 |
+
gr.Interface(
|
114 |
+
fn=evaluate,
|
115 |
+
inputs=[
|
116 |
+
gr.components.Textbox(
|
117 |
+
lines=2,
|
118 |
+
label="Instruction",
|
119 |
+
placeholder="Tell me about alpacas.",
|
120 |
+
),
|
121 |
+
gr.components.Textbox(lines=2, label="Input", placeholder="none"),
|
122 |
+
gr.components.Slider(
|
123 |
+
minimum=0, maximum=1, value=0.1, label="Temperature"
|
124 |
+
),
|
125 |
+
gr.components.Slider(
|
126 |
+
minimum=0, maximum=1, value=0.75, label="Top p"
|
127 |
+
),
|
128 |
+
gr.components.Slider(
|
129 |
+
minimum=0, maximum=100, step=1, value=40, label="Top k"
|
130 |
+
),
|
131 |
+
gr.components.Slider(
|
132 |
+
minimum=1, maximum=4, step=1, value=4, label="Beams"
|
133 |
+
),
|
134 |
+
gr.components.Slider(
|
135 |
+
minimum=1, maximum=2000, step=1, value=128, label="Max tokens"
|
136 |
+
),
|
137 |
+
],
|
138 |
+
outputs=[
|
139 |
+
gr.inputs.Textbox(
|
140 |
+
lines=5,
|
141 |
+
label="Output",
|
142 |
+
)
|
143 |
+
],
|
144 |
+
title="LLM-Adapters",
|
145 |
+
description="This is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation.", # noqa: E501
|
146 |
+
).launch(share=share_gradio)
|
147 |
+
# Old testing code follows.
|
148 |
+
|
149 |
+
"""
|
150 |
+
# testing code for readme
|
151 |
+
for instruction in [
|
152 |
+
"Tell me about alpacas.",
|
153 |
+
"Tell me about the president of Mexico in 2019.",
|
154 |
+
"Tell me about the king of France in 2019.",
|
155 |
+
"List all Canadian provinces in alphabetical order.",
|
156 |
+
"Write a Python program that prints the first 10 Fibonacci numbers.",
|
157 |
+
"Write a program that prints the numbers from 1 to 100. But for multiples of three print 'Fizz' instead of the number and for the multiples of five print 'Buzz'. For numbers which are multiples of both three and five print 'FizzBuzz'.", # noqa: E501
|
158 |
+
"Tell me five words that rhyme with 'shock'.",
|
159 |
+
"Translate the sentence 'I have no mouth but I must scream' into Spanish.",
|
160 |
+
"Count up from 1 to 500.",
|
161 |
+
]:
|
162 |
+
print("Instruction:", instruction)
|
163 |
+
print("Response:", evaluate(instruction))
|
164 |
+
print()
|
165 |
+
"""
|
166 |
+
|
167 |
+
|
168 |
+
def generate_prompt(instruction, input=None):
|
169 |
+
if input:
|
170 |
+
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
171 |
+
|
172 |
+
### Instruction:
|
173 |
+
{instruction}
|
174 |
+
|
175 |
+
### Input:
|
176 |
+
{input}
|
177 |
+
|
178 |
+
### Response:
|
179 |
+
""" # noqa: E501
|
180 |
+
else:
|
181 |
+
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
182 |
+
|
183 |
+
### Instruction:
|
184 |
+
{instruction}
|
185 |
+
|
186 |
+
### Response:
|
187 |
+
""" # noqa: E501
|
188 |
+
|
189 |
+
|
190 |
+
if __name__ == "__main__":
|
191 |
+
fire.Fire(main)
|
SVFT-main/LLM-Adapters/lengths.ipynb
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"/home/eric/miniconda3/envs/dl3/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
13 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
14 |
+
"Found cached dataset json (/home/eric/.cache/huggingface/datasets/json/default-789f51900889f651/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
|
15 |
+
"100%|██████████| 1/1 [00:00<00:00, 784.28it/s]\n",
|
16 |
+
"Loading cached processed dataset at /home/eric/.cache/huggingface/datasets/json/default-789f51900889f651/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-f691ee34ec2034cb.arrow\n"
|
17 |
+
]
|
18 |
+
}
|
19 |
+
],
|
20 |
+
"source": [
|
21 |
+
"from datasets import load_dataset\n",
|
22 |
+
"from transformers import LlamaTokenizer\n",
|
23 |
+
"\n",
|
24 |
+
"\n",
|
25 |
+
"tokenizer = LlamaTokenizer.from_pretrained(\n",
|
26 |
+
" \"decapoda-research/llama-7b-hf\", add_eos_token=True\n",
|
27 |
+
")\n",
|
28 |
+
"tokenizer.pad_token = tokenizer.eos_token\n",
|
29 |
+
"tokenizer.pad_token_id = tokenizer.eos_token_id\n",
|
30 |
+
"\n",
|
31 |
+
"data = load_dataset(\"json\", data_files=\"alpaca_data.json\")\n",
|
32 |
+
"\n",
|
33 |
+
"\n",
|
34 |
+
"def generate_prompt(data_point):\n",
|
35 |
+
" # sorry about the formatting disaster gotta move fast\n",
|
36 |
+
" if data_point[\"input\"]:\n",
|
37 |
+
" return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
|
38 |
+
"\n",
|
39 |
+
"### Instruction:\n",
|
40 |
+
"{data_point[\"instruction\"]}\n",
|
41 |
+
"\n",
|
42 |
+
"### Input:\n",
|
43 |
+
"{data_point[\"input\"]}\n",
|
44 |
+
"\n",
|
45 |
+
"### Response:\n",
|
46 |
+
"{data_point[\"output\"]}\"\"\"\n",
|
47 |
+
" else:\n",
|
48 |
+
" return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
49 |
+
"\n",
|
50 |
+
"### Instruction:\n",
|
51 |
+
"{data_point[\"instruction\"]}\n",
|
52 |
+
"\n",
|
53 |
+
"### Response:\n",
|
54 |
+
"{data_point[\"output\"]}\"\"\"\n",
|
55 |
+
"\n",
|
56 |
+
"\n",
|
57 |
+
"data = data.map(\n",
|
58 |
+
" lambda data_point: {\"prompt\": tokenizer(generate_prompt(data_point))}\n",
|
59 |
+
")"
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": 2,
|
65 |
+
"metadata": {},
|
66 |
+
"outputs": [
|
67 |
+
{
|
68 |
+
"data": {
|
69 |
+
"text/plain": [
|
70 |
+
"<matplotlib.lines.Line2D at 0x7f6f1af20af0>"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
"execution_count": 2,
|
74 |
+
"metadata": {},
|
75 |
+
"output_type": "execute_result"
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"data": {
|
79 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAGzCAYAAAAxPS2EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA/q0lEQVR4nO3de1hVZf7//xeobBDcG0+AJCqTjkoeSizd09Ekyeio1mhmaJpfDSu0MbUaM5vS0UrtoFbOiJ/ppHaVleSBxMOUpEZRamk6YTgZWBls8QAK9++PfqxxCyoQCAuej+va18W+13uvfa8bkJf3WvfaPsYYIwAAABvxrekOAAAAVBQBBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBvXWtGnT5OPjc17e65prrtE111xjPd+wYYN8fHz09ttvn5f3Hz58uNq1a3de3quy8vPzNWrUKIWFhcnHx0eJiYk13aVareRnaMOGDTXdlXPy8fHRuHHjarobqGMIMKgTkpKS5OPjYz38/f0VHh6u2NhYPf/88zp8+HCVvM+BAwc0bdo0ZWRkVMn+qlJt7lt5PP3000pKStLYsWP1r3/9S8OGDavpLp1XR48e1bRp02wRSMqyefNmTZs2Tbm5uTXdFdQTDWu6A0BVmj59uiIjI3XixAllZ2drw4YNSkxM1HPPPaf3339f3bp1s2ofe+wxTZ48uUL7P3DggJ544gm1a9dOF198cblft3bt2gq9T2WcrW+vvvqqiouLq70Pv0dqaqp69+6txx9/vKa7UiOOHj2qJ554QpK8ZuvsYvPmzXriiSc0fPhwBQcH13R3UA8QYFCn9O/fXz179rSeT5kyRampqbrxxht1880365tvvlFAQIAkqWHDhmrYsHp/BY4eParGjRvLz8+vWt/nXBo1alSj718eBw8eVFRUVLW/z5EjRxQYGFjt7wOgenEKCXXetddeq7/+9a/6/vvv9dprr1ntZV0Dk5KSoiuuuELBwcEKCgpSx44d9cgjj0j67ZqDSy+9VJI0YsQI63RVUlKSpN/+19ylSxelp6frqquuUuPGja3Xnn4NTImioiI98sgjCgsLU2BgoG6++Wbt37/fq6Zdu3YaPnx4qdeeus9z9a2sa2COHDmihx56SBEREXI4HOrYsaOeeeYZnf4B9SXXL6xYsUJdunSRw+HQRRddpNWrV5c94Kc5ePCgRo4cqdDQUPn7+6t79+5asmSJtb3kWo7MzEwlJydbfd+3b98Z91nSp9dff10dO3aUv7+/oqOjtWnTJq+6ku/x119/rTvvvFNNmzbVFVdcIUk6efKknnzySV144YVyOBxq166dHnnkERUUFHjto127drrxxhu1YcMG9ezZUwEBAeratat1quedd95R165drT588cUXXq8fPny4goKC9N133yk2NlaBgYEKDw/X9OnTrbHet2+fWrZsKUl64oknrDGYNm1aucb4VFu2bNH1118vl8ulxo0b6+qrr9Ynn3xS5rjs3bvXmjFxuVwaMWKEjh496lV77NgxPfDAA2rRooWaNGmim2++WT/88INX/6ZNm6aJEydKkiIjI8/4PTzXz9Dhw4eVmJiodu3ayeFwKCQkRNddd50+//zzCo8D6j5mYFAvDBs2TI888ojWrl2re++9t8yanTt36sYbb1S3bt00ffp0ORwO7d271/rHv3Pnzpo+fbqmTp2q0aNH68orr5Qk/elPf7L28csvv6h///4aPHiw7rrrLoWGhp61X0899ZR8fHw0adIkHTx4UHPnzlVMTIwyMjKsmaLyKE/fTmWM0c0336z169dr5MiRuvjii7VmzRpNnDhRP/zwg+bMmeNV//HHH+udd97RfffdpyZNmuj555/XwIEDlZWVpebNm5+xX8eOHdM111yjvXv3aty4cYqMjNTy5cs1fPhw5ebm6sEHH1Tnzp31r3/9S+PHj1fr1q310EMPSZL1B/1MNm7cqKVLl+qBBx6Qw+HQ/Pnzdf3112vr1q3q0qWLV+3tt9+uDh066Omnn7ZCw6hRo7RkyRINGjRIDz30kLZs2aIZM2bom2++0bvvvuv1+r179+rOO+/U//t//0933XWXnnnmGd10001auHChHnnkEd13332SpBkzZuiOO+7Q7t275ev7v/8fFhUV6frrr1fv3r01a9YsrV69Wo8//rhOnjyp6dOnq2XLllqwYIHGjh2r2267TQMGDJAkr1Oe5ZGamqr+/fsrOjpajz/+uHx9fbV48WJde+21+ve//63LLrvMq/6OO+5QZGSkZsyYoc8//1yLFi1SSEiI/v73v1s1w4cP17JlyzRs2DD17t1bGzduVFxcnNd+BgwYoG+//VZvvvmm5syZoxYtWkjy/h6W52dozJgxevvttzVu3DhFRUXpl19+0ccff6xvvvlGPXr0qNBYoB4wQB2wePFiI8ls27btjDUul8tccskl1vPHH3/cnPorMGfOHCPJ/PTTT2fcx7Zt24wks3jx4lLbrr76aiPJLFy4sMxtV199tfV8/fr1RpK54IILjMfjsdqXLVtmJJl58+ZZbW3btjXx8fHn3OfZ+hYfH2/atm1rPV+xYoWRZP72t7951Q0aNMj4+PiYvXv3Wm2SjJ+fn1fbl19+aSSZF154odR7nWru3LlGknnttdestsLCQuN2u01QUJDXsbdt29bExcWddX+n9kmS+eyzz6y277//3vj7+5vbbrvNaiv5Hg8ZMsTr9RkZGUaSGTVqlFf7X/7yFyPJpKamevVLktm8ebPVtmbNGiPJBAQEmO+//95qf/nll40ks379eqstPj7eSDL333+/1VZcXGzi4uKMn5+f9fP2008/GUnm8ccfL9cYlPwMlbxXcXGx6dChg4mNjTXFxcVW3dGjR01kZKS57rrrSo3LPffc47XP2267zTRv3tx6np6ebiSZxMREr7rhw4eX6uvs2bONJJOZmVmqr+X9GXK5XCYhIaFcxw9wCgn1RlBQ0FlXI5VcePjee+9V+oJXh8OhESNGlLv+7rvvVpMmTazngwYNUqtWrfThhx9W6v3L68MPP1SDBg30wAMPeLU/9NBDMsZo1apVXu0xMTG68MILrefdunWT0+nUd999d873CQsL05AhQ6y2Ro0a6YEHHlB+fr42btxY6WNwu92Kjo62nrdp00a33HKL1qxZo6KiIq/aMWPGlOqXJE2YMMGrvWT2Jzk52as9KipKbrfbet6rVy9Jv52ebNOmTan2ssbl1GXEJafACgsL9dFHH53jSMsnIyNDe/bs0Z133qlffvlFP//8s37++WcdOXJEffv21aZNm0r9XJ8+LldeeaV++eUXeTweSbJO8ZTMMJW4//77K9y/8vwMBQcHa8uWLTpw4ECF94/6hwCDeiM/P98rLJzuz3/+sy6//HKNGjVKoaGhGjx4sJYtW1ahMHPBBRdU6ILdDh06eD338fFR+/btz3r9R1X4/vvvFR4eXmo8OnfubG0/1al/pEs0bdpUv/766znfp0OHDl6nU872PhVx+thJ0h//+EcdPXpUP/30k1d7ZGRkqX75+vqqffv2Xu1hYWEKDg4+5/G7XC5JUkRERJntp4+Lr6+v/vCHP5Tqq6Qq+17v2bNHkhQfH6+WLVt6PRYtWqSCggLl5eV5veb042ratKlX/0vG6fTxO33cyqM8P0OzZs3Sjh07FBERocsuu0zTpk07Z0hG/cU1MKgX/vvf/yovL++s//AGBARo06ZNWr9+vZKTk7V69WotXbpU1157rdauXasGDRqc830qct1KeZ3pZntFRUXl6lNVONP7mNMu+K2tzvR9Ke+NDM90/LVpXEqC9uzZs8+4xD8oKMjr+fnsf3ne64477tCVV16pd999V2vXrtXs2bP197//Xe+884769+9f5X2CvTEDg3rhX//6lyQpNjb2rHW+vr7q27evnnvuOX399dd66qmnlJqaqvXr10sq/x+88ir5X3MJY4z27t3rtWKoadOmZd4c7PRZgor0rW3btjpw4ECpU2q7du2ytleFtm3bas+ePaVmsarifU4fO0n69ttv1bhx43NeANy2bVsVFxeX2kdOTo5yc3Or7PhLFBcXl5pJ+PbbbyXJ+l7/3p+tktMzTqdTMTExZT4qupy+ZJwyMzO92vfu3Vuqtqp+N1q1aqX77rtPK1asUGZmppo3b66nnnqqSvaNuoUAgzovNTVVTz75pCIjIzV06NAz1h06dKhUW8n/ZEuW1pbcP6Sq7jb6f//3f14h4u2339aPP/7o9b/NCy+8UJ9++qkKCwuttpUrV5Zabl2Rvt1www0qKirSiy++6NU+Z84c+fj4VNn/dm+44QZlZ2dr6dKlVtvJkyf1wgsvKCgoSFdffXWl952Wlua1vHb//v1677331K9fv3POTN1www2SpLlz53q1P/fcc5JUapVNVTh1rI0xevHFF9WoUSP17dtXktS4cWNJlf/Zio6O1oUXXqhnnnlG+fn5pbafflqtPEoC//z5873aX3jhhVK1v/d3o6ioqNQprpCQEIWHh5da2g5InEJCHbNq1Srt2rVLJ0+eVE5OjlJTU5WSkqK2bdvq/fffl7+//xlfO336dG3atElxcXFq27atDh48qPnz56t169bWvUMuvPBCBQcHa+HChWrSpIkCAwPVq1evUtcIlFezZs10xRVXaMSIEcrJydHcuXPVvn17r6Xeo0aN0ttvv63rr79ed9xxh/7zn//otdde87ogsqJ9u+mmm9SnTx89+uij2rdvn7p37661a9fqvffeU2JiYql9V9bo0aP18ssva/jw4UpPT1e7du309ttv65NPPtHcuXPPek3SuXTp0kWxsbFey6glWXezPZvu3bsrPj5er7zyinJzc3X11Vdr69atWrJkiW699Vb16dOn0v0qi7+/v1avXq34+Hj16tVLq1atUnJysh555BFrtiggIEBRUVFaunSp/vjHP6pZs2bq0qVLqSXhZ+Lr66tFixapf//+uuiiizRixAhdcMEF+uGHH7R+/Xo5nU598MEHFep3dHS0Bg4cqLlz5+qXX36xllGXzB6dOutSckH1o48+qsGDB6tRo0a66aabyn3TwMOHD6t169YaNGiQunfvrqCgIH300Ufatm2bnn322Qr1G/VEDa6AAqpMyTLqkoefn58JCwsz1113nZk3b57Xct0Spy+jXrdunbnllltMeHi48fPzM+Hh4WbIkCHm22+/9Xrde++9Z6KiokzDhg29li1fffXV5qKLLiqzf2daRv3mm2+aKVOmmJCQEBMQEGDi4uK8luWWePbZZ80FF1xgHA6Hufzyy81nn31Wap9n69vpy6iNMebw4cNm/PjxJjw83DRq1Mh06NDBzJ4922sJrjG/LYEta2nrmZZ3ny4nJ8eMGDHCtGjRwvj5+ZmuXbuWudS7osuoExISzGuvvWY6dOhgHA6HueSSS7yWLxvzv+9xWUvjT5w4YZ544gkTGRlpGjVqZCIiIsyUKVPM8ePHy9WvssYlMzPTSDKzZ8+22uLj401gYKD5z3/+Y/r162caN25sQkNDzeOPP26Kioq8Xr9582YTHR1t/Pz8zrmk+vRl1CW++OILM2DAANO8eXPjcDhM27ZtzR133GHWrVt3znEp+T06dSn0kSNHTEJCgmnWrJkJCgoyt956q9m9e7eRZGbOnOn1+ieffNJccMEFxtfX12s/5fkZKigoMBMnTjTdu3c3TZo0MYGBgaZ79+5m/vz5ZxwD1G8+xtjkKjwA+P/5+PgoISGh1Cmw2mj48OF6++23yzytY1cZGRm65JJL9Nprr531tCxQnbgGBgBwRseOHSvVNnfuXPn6+uqqq66qgR4Bv+EaGADAGc2aNUvp6enq06ePGjZsqFWrVmnVqlUaPXp0qfvgAOcTAQYAcEZ/+tOflJKSoieffFL5+flq06aNpk2bpkcffbSmu4Z6jmtgAACA7XANDAAAsB0CDAAAsJ06ew1McXGxDhw4oCZNmlT57d8BAED1MMbo8OHDCg8PL/VBsKeqswHmwIEDXCEPAIBN7d+/X61btz7j9jobYEpuUb5//345nc4a7k09deSIFB7+29cHDkjlvKU4AKD+8ng8ioiIOOdHjdTZAFNy2sjpdBJgasqpH6jndBJgAADldq7LP7iIFwAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2E7Dmu5AfdJucrLX830z42qoJwAA2BszMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYqFGDatWsnHx+fUo+EhARJ0vHjx5WQkKDmzZsrKChIAwcOVE5Ojtc+srKyFBcXp8aNGyskJEQTJ07UyZMnvWo2bNigHj16yOFwqH379kpKSvp9RwkAAOqUCgWYbdu26ccff7QeKSkpkqTbb79dkjR+/Hh98MEHWr58uTZu3KgDBw5owIAB1uuLiooUFxenwsJCbd68WUuWLFFSUpKmTp1q1WRmZiouLk59+vRRRkaGEhMTNWrUKK1Zs6YqjhcAANQBPsYYU9kXJyYmauXKldqzZ488Ho9atmypN954Q4MGDZIk7dq1S507d1ZaWpp69+6tVatW6cYbb9SBAwcUGhoqSVq4cKEmTZqkn376SX5+fpo0aZKSk5O1Y8cO630GDx6s3NxcrV69utx983g8crlcysvLk9PprOwhVql2k5O9nu+bGVdDPTlPjhyRgoJ++zo/XwoMrNn+AABqvfL+/a70NTCFhYV67bXXdM8998jHx0fp6ek6ceKEYmJirJpOnTqpTZs2SktLkySlpaWpa9euVniRpNjYWHk8Hu3cudOqOXUfJTUl+ziTgoICeTwerwcAAKibKh1gVqxYodzcXA0fPlySlJ2dLT8/PwUHB3vVhYaGKjs726o5NbyUbC/ZdrYaj8ejY8eOnbE/M2bMkMvlsh4RERGVPTQAAFDLVTrA/OMf/1D//v0VHh5elf2ptClTpigvL8967N+/v6a7BAAAqknDyrzo+++/10cffaR33nnHagsLC1NhYaFyc3O9ZmFycnIUFhZm1WzdutVrXyWrlE6tOX3lUk5OjpxOpwICAs7YJ4fDIYfDUZnDAQAANlOpGZjFixcrJCREcXH/uwg1OjpajRo10rp166y23bt3KysrS263W5Lkdru1fft2HTx40KpJSUmR0+lUVFSUVXPqPkpqSvYBAABQ4QBTXFysxYsXKz4+Xg0b/m8Cx+VyaeTIkZowYYLWr1+v9PR0jRgxQm63W71795Yk9evXT1FRURo2bJi+/PJLrVmzRo899pgSEhKs2ZMxY8bou+++08MPP6xdu3Zp/vz5WrZsmcaPH19FhwwAAOyuwqeQPvroI2VlZemee+4ptW3OnDny9fXVwIEDVVBQoNjYWM2fP9/a3qBBA61cuVJjx46V2+1WYGCg4uPjNX36dKsmMjJSycnJGj9+vObNm6fWrVtr0aJFio2NreQhAgCAuuZ33QemNuM+MLUA94EBAFRQtd8HBgAAoKYQYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO0QYAAAgO1UOMD88MMPuuuuu9S8eXMFBASoa9eu+uyzz6ztxhhNnTpVrVq1UkBAgGJiYrRnzx6vfRw6dEhDhw6V0+lUcHCwRo4cqfz8fK+ar776SldeeaX8/f0VERGhWbNmVfIQa0a7ycmlHgAAoGpUKMD8+uuvuvzyy9WoUSOtWrVKX3/9tZ599lk1bdrUqpk1a5aef/55LVy4UFu2bFFgYKBiY2N1/Phxq2bo0KHauXOnUlJStHLlSm3atEmjR4+2tns8HvXr109t27ZVenq6Zs+erWnTpumVV16pgkMGAAB252OMMeUtnjx5sj755BP9+9//LnO7MUbh4eF66KGH9Je//EWSlJeXp9DQUCUlJWnw4MH65ptvFBUVpW3btqlnz56SpNWrV+uGG27Qf//7X4WHh2vBggV69NFHlZ2dLT8/P+u9V6xYoV27dpWrrx6PRy6XS3l5eXI6neU9xCpTnhmXfTPjzkNPatCRI1JQ0G9f5+dLgYE12x8AQK1X3r/fFZqBef/999WzZ0/dfvvtCgkJ0SWXXKJXX33V2p6Zmans7GzFxMRYbS6XS7169VJaWpokKS0tTcHBwVZ4kaSYmBj5+vpqy5YtVs1VV11lhRdJio2N1e7du/Xrr7+W2beCggJ5PB6vBwAAqJsqFGC+++47LViwQB06dNCaNWs0duxYPfDAA1qyZIkkKTs7W5IUGhrq9brQ0FBrW3Z2tkJCQry2N2zYUM2aNfOqKWsfp77H6WbMmCGXy2U9IiIiKnJoAADARioUYIqLi9WjRw89/fTTuuSSSzR69Gjde++9WrhwYXX1r9ymTJmivLw867F///6a7hIAAKgmFQowrVq1UlRUlFdb586dlZWVJUkKCwuTJOXk5HjV5OTkWNvCwsJ08OBBr+0nT57UoUOHvGrK2sep73E6h8Mhp9Pp9QAAAHVThQLM5Zdfrt27d3u1ffvtt2rbtq0kKTIyUmFhYVq3bp213ePxaMuWLXK73ZIkt9ut3NxcpaenWzWpqakqLi5Wr169rJpNmzbpxIkTVk1KSoo6duzoteIJAADUTxUKMOPHj9enn36qp59+Wnv37tUbb7yhV155RQkJCZIkHx8fJSYm6m9/+5vef/99bd++XXfffbfCw8N16623Svptxub666/Xvffeq61bt+qTTz7RuHHjNHjwYIWHh0uS7rzzTvn5+WnkyJHauXOnli5dqnnz5mnChAlVe/QAAMCWGlak+NJLL9W7776rKVOmaPr06YqMjNTcuXM1dOhQq+bhhx/WkSNHNHr0aOXm5uqKK67Q6tWr5e/vb9W8/vrrGjdunPr27StfX18NHDhQzz//vLXd5XJp7dq1SkhIUHR0tFq0aKGpU6d63SsGAADUXxW6D4ydcB+YWoD7wAAAKqha7gMDAABQGxBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7RBgAACA7TSs6Q7UZ+0mJ5dq2zczrgZ6AgCAvTADAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbIcAAwAAbKdCAWbatGny8fHxenTq1Mnafvz4cSUkJKh58+YKCgrSwIEDlZOT47WPrKwsxcXFqXHjxgoJCdHEiRN18uRJr5oNGzaoR48ecjgcat++vZKSkip/hAAAoM6p8AzMRRddpB9//NF6fPzxx9a28ePH64MPPtDy5cu1ceNGHThwQAMGDLC2FxUVKS4uToWFhdq8ebOWLFmipKQkTZ061arJzMxUXFyc+vTpo4yMDCUmJmrUqFFas2bN7zxUAABQVzSs8AsaNlRYWFip9ry8PP3jH//QG2+8oWuvvVaStHjxYnXu3FmffvqpevfurbVr1+rrr7/WRx99pNDQUF188cV68sknNWnSJE2bNk1+fn5auHChIiMj9eyzz0qSOnfurI8//lhz5sxRbGzs7zxcAABQF1R4BmbPnj0KDw/XH/7wBw0dOlRZWVmSpPT0dJ04cUIxMTFWbadOndSmTRulpaVJktLS0tS1a1eFhoZaNbGxsfJ4PNq5c6dVc+o+SmpK9nEmBQUF8ng8Xg8AAFA3VSjA9OrVS0lJSVq9erUWLFigzMxMXXnllTp8+LCys7Pl5+en4OBgr9eEhoYqOztbkpSdne0VXkq2l2w7W43H49GxY8fO2LcZM2bI5XJZj4iIiIocGgAAsJEKnULq37+/9XW3bt3Uq1cvtW3bVsuWLVNAQECVd64ipkyZogkTJljPPR4PIQYAgDrqdy2jDg4O1h//+Eft3btXYWFhKiwsVG5urldNTk6Odc1MWFhYqVVJJc/PVeN0Os8akhwOh5xOp9cDAADUTb8rwOTn5+s///mPWrVqpejoaDVq1Ejr1q2ztu/evVtZWVlyu92SJLfbre3bt+vgwYNWTUpKipxOp6KioqyaU/dRUlOyDwAAgAoFmL/85S/auHGj9u3bp82bN+u2225TgwYNNGTIELlcLo0cOVITJkzQ+vXrlZ6erhEjRsjtdqt3796SpH79+ikqKkrDhg3Tl19+qTVr1uixxx5TQkKCHA6HJGnMmDH67rvv9PDDD2vXrl2aP3++li1bpvHjx1f90QMAAFuq0DUw//3vfzVkyBD98ssvatmypa644gp9+umnatmypSRpzpw58vX11cCBA1VQUKDY2FjNnz/fen2DBg20cuVKjR07Vm63W4GBgYqPj9f06dOtmsjISCUnJ2v8+PGaN2+eWrdurUWLFrGEGgAAWHyMMaamO1EdPB6PXC6X8vLyauR6mHaTkyv1un0z46q4JzXoyBEpKOi3r/PzpcDAmu0PAKDWK+/fbz4LCQAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2A4BBgAA2E7Dmu5AXdFucnJNdwEAgHqDGRgAAGA7BBgAAGA7BBgAAGA7BBgAAGA7BBgAAGA7BBgAAGA7vyvAzJw5Uz4+PkpMTLTajh8/roSEBDVv3lxBQUEaOHCgcnJyvF6XlZWluLg4NW7cWCEhIZo4caJOnjzpVbNhwwb16NFDDodD7du3V1JS0u/pKgAAqEMqHWC2bduml19+Wd26dfNqHz9+vD744AMtX75cGzdu1IEDBzRgwABre1FRkeLi4lRYWKjNmzdryZIlSkpK0tSpU62azMxMxcXFqU+fPsrIyFBiYqJGjRqlNWvWVLa7AACgDqlUgMnPz9fQoUP16quvqmnTplZ7Xl6e/vGPf+i5557Ttddeq+joaC1evFibN2/Wp59+Kklau3atvv76a7322mu6+OKL1b9/fz355JN66aWXVFhYKElauHChIiMj9eyzz6pz584aN26cBg0apDlz5lTBIQMAALurVIBJSEhQXFycYmJivNrT09N14sQJr/ZOnTqpTZs2SktLkySlpaWpa9euCg0NtWpiY2Pl8Xi0c+dOq+b0fcfGxlr7KEtBQYE8Ho/XAwAA1E0V/iiBt956S59//rm2bdtWalt2drb8/PwUHBzs1R4aGqrs7Gyr5tTwUrK9ZNvZajwej44dO6aAgIBS7z1jxgw98cQTFT0cAABgQxWagdm/f78efPBBvf766/L396+uPlXKlClTlJeXZz32799f010CAADVpEIzMOnp6Tp48KB69OhhtRUVFWnTpk168cUXtWbNGhUWFio3N9drFiYnJ0dhYWGSpLCwMG3dutVrvyWrlE6tOX3lUk5OjpxOZ5mzL5LkcDjkcDgqcji10ukfCrlvZlwN9QQAgNqrQgGmb9++2r59u1fbiBEj1KlTJ02aNEkRERFq1KiR1q1bp4EDB0qSdu/eraysLLndbkmS2+3WU089pYMHDyokJESSlJKSIqfTqaioKKvmww8/9HqflJQUax/1SVmfck2oAQDUdxUKME2aNFGXLl282gIDA9W8eXOrfeTIkZowYYKaNWsmp9Op+++/X263W71795Yk9evXT1FRURo2bJhmzZql7OxsPfbYY0pISLBmUMaMGaMXX3xRDz/8sO655x6lpqZq2bJlSk4u/cccAADUPxW+iPdc5syZI19fXw0cOFAFBQWKjY3V/Pnzre0NGjTQypUrNXbsWLndbgUGBio+Pl7Tp0+3aiIjI5WcnKzx48dr3rx5at26tRYtWqTY2Niq7i4AALAhH2OMqelOVAePxyOXy6W8vDw5nc5qf7+yTvVUF9ucQjpyRAoK+u3r/HwpMLBm+wMAqPXK+/ebz0ICAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC2Q4ABAAC207CmO4CKazc52ev5vplxNdQTAABqBjMwAADAdggwAADAdggwAADAdggwAADAdriItw44/aJeiQt7AQB1GzMwAADAdggwAADAdggwAADAdggwAADAdggwAADAdggwAADAdggwAADAdggwAADAdggwAADAdggwAADAdggwAADAdggwAADAdggwAADAdioUYBYsWKBu3brJ6XTK6XTK7XZr1apV1vbjx48rISFBzZs3V1BQkAYOHKicnByvfWRlZSkuLk6NGzdWSEiIJk6cqJMnT3rVbNiwQT169JDD4VD79u2VlJRU+SMEAAB1ToUCTOvWrTVz5kylp6frs88+07XXXqtbbrlFO3fulCSNHz9eH3zwgZYvX66NGzfqwIEDGjBggPX6oqIixcXFqbCwUJs3b9aSJUuUlJSkqVOnWjWZmZmKi4tTnz59lJGRocTERI0aNUpr1qypokMGAAB252OMMb9nB82aNdPs2bM1aNAgtWzZUm+88YYGDRokSdq1a5c6d+6stLQ09e7dW6tWrdKNN96oAwcOKDQ0VJK0cOFCTZo0ST/99JP8/Pw0adIkJScna8eOHdZ7DB48WLm5uVq9enW5++XxeORyuZSXlyen0/l7DrFc2k1Orvb3qIh9M+NqugvSkSNSUNBvX+fnS4GBNdsfAECtV96/35W+BqaoqEhvvfWWjhw5IrfbrfT0dJ04cUIxMTFWTadOndSmTRulpaVJktLS0tS1a1crvEhSbGysPB6PNYuTlpbmtY+SmpJ9nElBQYE8Ho/XAwAA1E0VDjDbt29XUFCQHA6HxowZo3fffVdRUVHKzs6Wn5+fgoODvepDQ0OVnZ0tScrOzvYKLyXbS7adrcbj8ejYsWNn7NeMGTPkcrmsR0REREUPDQAA2ESFA0zHjh2VkZGhLVu2aOzYsYqPj9fXX39dHX2rkClTpigvL8967N+/v6a7BAAAqknDir7Az89P7du3lyRFR0dr27Ztmjdvnv785z+rsLBQubm5XrMwOTk5CgsLkySFhYVp69atXvsrWaV0as3pK5dycnLkdDoVEBBwxn45HA45HI6KHg4AALCh330fmOLiYhUUFCg6OlqNGjXSunXrrG27d+9WVlaW3G63JMntdmv79u06ePCgVZOSkiKn06moqCir5tR9lNSU7AMAAKBCMzBTpkxR//791aZNGx0+fFhvvPGGNmzYoDVr1sjlcmnkyJGaMGGCmjVrJqfTqfvvv19ut1u9e/eWJPXr109RUVEaNmyYZs2apezsbD322GNKSEiwZk/GjBmjF198UQ8//LDuuecepaamatmyZUpOrl2rfAAAQM2pUIA5ePCg7r77bv34449yuVzq1q2b1qxZo+uuu06SNGfOHPn6+mrgwIEqKChQbGys5s+fb72+QYMGWrlypcaOHSu3263AwEDFx8dr+vTpVk1kZKSSk5M1fvx4zZs3T61bt9aiRYsUGxtbRYcMAADs7nffB6a24j4w3AcGAGA/1X4fGAAAgJpS4VVIsIfTZ4RqxYwMAABVhBkYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOwQYAABgOw1rugM4P9pNTi7Vtm9mXA30BACA348ZGAAAYDsEGAAAYDsEGAAAYDsVCjAzZszQpZdeqiZNmigkJES33nqrdu/e7VVz/PhxJSQkqHnz5goKCtLAgQOVk5PjVZOVlaW4uDg1btxYISEhmjhxok6ePOlVs2HDBvXo0UMOh0Pt27dXUlJS5Y4QAADUORUKMBs3blRCQoI+/fRTpaSk6MSJE+rXr5+OHDli1YwfP14ffPCBli9fro0bN+rAgQMaMGCAtb2oqEhxcXEqLCzU5s2btWTJEiUlJWnq1KlWTWZmpuLi4tSnTx9lZGQoMTFRo0aN0po1a6rgkAEAgN35GGNMZV/8008/KSQkRBs3btRVV12lvLw8tWzZUm+88YYGDRokSdq1a5c6d+6stLQ09e7dW6tWrdKNN96oAwcOKDQ0VJK0cOFCTZo0ST/99JP8/Pw0adIkJScna8eOHdZ7DR48WLm5uVq9enW5+ubxeORyuZSXlyen01nZQyy3slb51HbVvgrpyBEpKOi3r/PzpcDA6n0/AIDtlffv9++6BiYvL0+S1KxZM0lSenq6Tpw4oZiYGKumU6dOatOmjdLS0iRJaWlp6tq1qxVeJCk2NlYej0c7d+60ak7dR0lNyT7KUlBQII/H4/UAAAB1U6UDTHFxsRITE3X55ZerS5cukqTs7Gz5+fkpODjYqzY0NFTZ2dlWzanhpWR7ybaz1Xg8Hh07dqzM/syYMUMul8t6REREVPbQAABALVfpAJOQkKAdO3borbfeqsr+VNqUKVOUl5dnPfbv31/TXQIAANWkUnfiHTdunFauXKlNmzapdevWVntYWJgKCwuVm5vrNQuTk5OjsLAwq2br1q1e+ytZpXRqzekrl3JycuR0OhUQEFBmnxwOhxwOR2UOBwAA2EyFZmCMMRo3bpzeffddpaamKjIy0mt7dHS0GjVqpHXr1lltu3fvVlZWltxutyTJ7XZr+/btOnjwoFWTkpIip9OpqKgoq+bUfZTUlOwDAADUbxWagUlISNAbb7yh9957T02aNLGuWXG5XAoICJDL5dLIkSM1YcIENWvWTE6nU/fff7/cbrd69+4tSerXr5+ioqI0bNgwzZo1S9nZ2XrssceUkJBgzaCMGTNGL774oh5++GHdc889Sk1N1bJly5ScbL+VPgAAoOpVaAZmwYIFysvL0zXXXKNWrVpZj6VLl1o1c+bM0Y033qiBAwfqqquuUlhYmN555x1re4MGDbRy5Uo1aNBAbrdbd911l+6++25Nnz7dqomMjFRycrJSUlLUvXt3Pfvss1q0aJFiY2Or4JABAIDd/a77wNRm3Afm3LgPDACgtjkv94EBAACoCQQYAABgO5VaRo26oazTXtV+WgkAgCrADAwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALAdAgwAALCdhjXdAdQu7SYnez3fNzOuhnoCAMCZMQMDAABshwADAABshwADAABshwADAABshwADAABshwADAABshwADAABshwADAABshwADAABshwADAABshwADAABsp8IBZtOmTbrpppsUHh4uHx8frVixwmu7MUZTp05Vq1atFBAQoJiYGO3Zs8er5tChQxo6dKicTqeCg4M1cuRI5efne9V89dVXuvLKK+Xv76+IiAjNmjWr4kcHAADqpAoHmCNHjqh79+566aWXytw+a9YsPf/881q4cKG2bNmiwMBAxcbG6vjx41bN0KFDtXPnTqWkpGjlypXatGmTRo8ebW33eDzq16+f2rZtq/T0dM2ePVvTpk3TK6+8UolDBAAAdU2FP426f//+6t+/f5nbjDGaO3euHnvsMd1yyy2SpP/7v/9TaGioVqxYocGDB+ubb77R6tWrtW3bNvXs2VOS9MILL+iGG27QM888o/DwcL3++usqLCzUP//5T/n5+emiiy5SRkaGnnvuOa+gAwAA6qcqvQYmMzNT2dnZiomJsdpcLpd69eqltLQ0SVJaWpqCg4Ot8CJJMTEx8vX11ZYtW6yaq666Sn5+flZNbGysdu/erV9//bXM9y4oKJDH4/F6AACAuqlKA0x2drYkKTQ01Ks9NDTU2padna2QkBCv7Q0bNlSzZs28asrax6nvcboZM2bI5XJZj4iIiN9/QAAAoFaqM6uQpkyZory8POuxf//+mu4SAACoJhW+BuZswsLCJEk5OTlq1aqV1Z6Tk6OLL77Yqjl48KDX606ePKlDhw5Zrw8LC1NOTo5XTcnzkprTORwOORyOKjkO/E+7ycml2vbNjKuBngAA8D9VGmAiIyMVFhamdevWWYHF4/Foy5YtGjt2rCTJ7XYrNzdX6enpio6OliSlpqaquLhYvXr1smoeffRRnThxQo0aNZIkpaSkqGPHjmratGlVdrlSyvqjDgAAzp8Kn0LKz89XRkaGMjIyJP124W5GRoaysrLk4+OjxMRE/e1vf9P777+v7du36+6771Z4eLhuvfVWSVLnzp11/fXX695779XWrVv1ySefaNy4cRo8eLDCw8MlSXfeeaf8/Pw0cuRI7dy5U0uXLtW8efM0YcKEKjtwAABgXxWegfnss8/Up08f63lJqIiPj1dSUpIefvhhHTlyRKNHj1Zubq6uuOIKrV69Wv7+/tZrXn/9dY0bN059+/aVr6+vBg4cqOeff97a7nK5tHbtWiUkJCg6OlotWrTQ1KlTWUINAAAkST7GGFPTnagOHo9HLpdLeXl5cjqdVbrv+n4KqdzXwBw5IgUF/fZ1fr4UGFh9nQIA1Anl/ftdZ1YhAQCA+qNKL+JF/XD6DBSrkgAA5xszMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHYIMAAAwHb4MEf8bqd/uKPEBzwCAKoXMzAAAMB2CDAAAMB2CDAAAMB2CDAAAMB2uIgX1aLd5GQFFB7XNzXdEQBAncQMDAAAsB0CDAAAsB0CDAAAsB0CDAAAsB0u4sV50fmvq3XMz9+rjbv1AgAqixkYAABgOwQYAABgO5xCQo05/UMgOaUEACgvZmAAAIDtMAODWuP0GRmJWRkAQNkIMKjVOM0EACgLp5AAAIDtMAMDWynrNNPpmKUBgLqPGRgAAGA7zMCgzmGWBgDqvlo9A/PSSy+pXbt28vf3V69evbR169aa7hIAAKgFau0MzNKlSzVhwgQtXLhQvXr10ty5cxUbG6vdu3crJCSkprsHmyvPLE1ZmLkBgNqh1gaY5557Tvfee69GjBghSVq4cKGSk5P1z3/+U5MnT67h3qG+4vQUANQOtTLAFBYWKj09XVOmTLHafH19FRMTo7S0tDJfU1BQoIKCAut5Xl6eJMnj8VR5/4oLjlb5PuuiosLjKhn9ooKjKjbFNdqf86XN+OVVsp8dT8Ses6bL42vO+Zry1ABAbVHyd9sYc9a6Whlgfv75ZxUVFSk0NNSrPTQ0VLt27SrzNTNmzNATTzxRqj0iIqJa+ojycZV8Mf/umuyGLbnmVs9rKrNfADjfDh8+LJfLdcbttTLAVMaUKVM0YcIE63lxcbEOHTqk5s2by8fHx6vW4/EoIiJC+/fvl9PpPN9drdUYm7NjfM6MsTk7xufMGJuzq2/jY4zR4cOHFR4efta6WhlgWrRooQYNGignJ8erPScnR2FhYWW+xuFwyOFweLUFBwef9X2cTme9+GGoDMbm7BifM2Nszo7xOTPG5uzq0/icbealRK1cRu3n56fo6GitW7fOaisuLta6devkdrtrsGcAAKA2qJUzMJI0YcIExcfHq2fPnrrssss0d+5cHTlyxFqVBAAA6q9aG2D+/Oc/66efftLUqVOVnZ2tiy++WKtXry51YW9lOBwOPf7446VOOYGxORfG58wYm7NjfM6MsTk7xqdsPuZc65QAAABqmVp5DQwAAMDZEGAAAIDtEGAAAIDtEGAAAIDtEGAAAIDt1MsA89JLL6ldu3by9/dXr169tHXr1pruUrWaMWOGLr30UjVp0kQhISG69dZbtXv3bq+a48ePKyEhQc2bN1dQUJAGDhxY6k7IWVlZiouLU+PGjRUSEqKJEyfq5MmT5/NQzouZM2fKx8dHiYmJVlt9Hp8ffvhBd911l5o3b66AgAB17dpVn332mbXdGKOpU6eqVatWCggIUExMjPbs2eO1j0OHDmno0KFyOp0KDg7WyJEjlZ+ff74PpcoVFRXpr3/9qyIjIxUQEKALL7xQTz75pNeH0NWX8dm0aZNuuukmhYeHy8fHRytWrPDaXlXj8NVXX+nKK6+Uv7+/IiIiNGvWrOo+tCpxtvE5ceKEJk2apK5duyowMFDh4eG6++67deDAAa991OXxqRRTz7z11lvGz8/P/POf/zQ7d+409957rwkODjY5OTk13bVqExsbaxYvXmx27NhhMjIyzA033GDatGlj8vPzrZoxY8aYiIgIs27dOvPZZ5+Z3r17mz/96U/W9pMnT5ouXbqYmJgY88UXX5gPP/zQtGjRwkyZMqUmDqnabN261bRr185069bNPPjgg1Z7fR2fQ4cOmbZt25rhw4ebLVu2mO+++86sWbPG7N2716qZOXOmcblcZsWKFebLL780N998s4mMjDTHjh2zaq6//nrTvXt38+mnn5p///vfpn379mbIkCE1cUhV6qmnnjLNmzc3K1euNJmZmWb58uUmKCjIzJs3z6qpL+Pz4YcfmkcffdS88847RpJ59913vbZXxTjk5eWZ0NBQM3ToULNjxw7z5ptvmoCAAPPyyy+fr8OstLONT25uromJiTFLly41u3btMmlpaeayyy4z0dHRXvuoy+NTGfUuwFx22WUmISHBel5UVGTCw8PNjBkzarBX59fBgweNJLNx40ZjzG+/PI0aNTLLly+3ar755hsjyaSlpRljfvvl8/X1NdnZ2VbNggULjNPpNAUFBef3AKrJ4cOHTYcOHUxKSoq5+uqrrQBTn8dn0qRJ5oorrjjj9uLiYhMWFmZmz55tteXm5hqHw2HefPNNY4wxX3/9tZFktm3bZtWsWrXK+Pj4mB9++KH6On8exMXFmXvuucerbcCAAWbo0KHGmPo7Pqf/ga6qcZg/f75p2rSp1+/UpEmTTMeOHav5iKpWWQHvdFu3bjWSzPfff2+MqV/jU1716hRSYWGh0tPTFRMTY7X5+voqJiZGaWlpNdiz8ysvL0+S1KxZM0lSenq6Tpw44TUunTp1Ups2baxxSUtLU9euXb3uhBwbGyuPx6OdO3eex95Xn4SEBMXFxXmNg1S/x+f9999Xz549dfvttyskJESXXHKJXn31VWt7ZmamsrOzvcbG5XKpV69eXmMTHBysnj17WjUxMTHy9fXVli1bzt/BVIM//elPWrdunb799ltJ0pdffqmPP/5Y/fv3l8T4lKiqcUhLS9NVV10lPz8/qyY2Nla7d+/Wr7/+ep6O5vzIy8uTj4+P9aHEjE9ptfajBKrDzz//rKKiolIfRxAaGqpdu3bVUK/Or+LiYiUmJuryyy9Xly5dJEnZ2dny8/Mr9endoaGhys7OtmrKGreSbXb31ltv6fPPP9e2bdtKbavP4/Pdd99pwYIFmjBhgh555BFt27ZNDzzwgPz8/BQfH28dW1nHfurYhISEeG1v2LChmjVrZuuxkaTJkyfL4/GoU6dOatCggYqKivTUU09p6NChklTvx6dEVY1Ddna2IiMjS+2jZFvTpk2rpf/n2/HjxzVp0iQNGTLE+vRpxqe0ehVg8Nssw44dO/Txxx/XdFdqjf379+vBBx9USkqK/P39a7o7tUpxcbF69uypp59+WpJ0ySWXaMeOHVq4cKHi4+NruHc1b9myZXr99df1xhtv6KKLLlJGRoYSExMVHh7O+KBSTpw4oTvuuEPGGC1YsKCmu1Or1atTSC1atFCDBg1KrR7JyclRWFhYDfXq/Bk3bpxWrlyp9evXq3Xr1lZ7WFiYCgsLlZub61V/6riEhYWVOW4l2+wsPT1dBw8eVI8ePdSwYUM1bNhQGzdu1PPPP6+GDRsqNDS03o5Pq1atFBUV5dXWuXNnZWVlSfrfsZ3tdyosLEwHDx702n7y5EkdOnTI1mMjSRMnTtTkyZM1ePBgde3aVcOGDdP48eM1Y8YMSYxPiaoah7r6e1aiJLx8//33SklJsWZfJManLPUqwPj5+Sk6Olrr1q2z2oqLi7Vu3Tq53e4a7Fn1MsZo3Lhxevfdd5WamlpqijE6OlqNGjXyGpfdu3crKyvLGhe3263t27d7/QKV/IKd/gfObvr27avt27crIyPDevTs2VNDhw61vq6v43P55ZeXWnL/7bffqm3btpKkyMhIhYWFeY2Nx+PRli1bvMYmNzdX6enpVk1qaqqKi4vVq1ev83AU1efo0aPy9fX+Z7RBgwYqLi6WxPiUqKpxcLvd2rRpk06cOGHVpKSkqGPHjrY/PVISXvbs2aOPPvpIzZs399pe38enTDV9FfH59tZbbxmHw2GSkpLM119/bUaPHm2Cg4O9Vo/UNWPHjjUul8ts2LDB/Pjjj9bj6NGjVs2YMWNMmzZtTGpqqvnss8+M2+02brfb2l6yTLhfv34mIyPDrF692rRs2dL2y4TP5NRVSMbU3/HZunWradiwoXnqqafMnj17zOuvv24aN25sXnvtNatm5syZJjg42Lz33nvmq6++MrfcckuZy2MvueQSs2XLFvPxxx+bDh062G6ZcFni4+PNBRdcYC2jfuedd0yLFi3Mww8/bNXUl/E5fPiw+eKLL8wXX3xhJJnnnnvOfPHFF9YqmqoYh9zcXBMaGmqGDRtmduzYYd566y3TuHFjWywTPtv4FBYWmptvvtm0bt3aZGRkeP07feqKoro8PpVR7wKMMca88MILpk2bNsbPz89cdtll5tNPP63pLlUrSWU+Fi9ebNUcO3bM3HfffaZp06amcePG5rbbbjM//vij13727dtn+vfvbwICAkyLFi3MQw89ZE6cOHGej+b8OD3A1Ofx+eCDD0yXLl2Mw+EwnTp1Mq+88orX9uLiYvPXv/7VhIaGGofDYfr27Wt2797tVfPLL7+YIUOGmKCgION0Os2IESPM4cOHz+dhVAuPx2MefPBB06ZNG+Pv72/+8Ic/mEcffdTrj059GZ/169eX+e9MfHy8MabqxuHLL780V1xxhXE4HOaCCy4wM2fOPF+H+LucbXwyMzPP+O/0+vXrrX3U5fGpDB9jTrllJAAAgA3Uq2tgAABA3UCAAQAAtkOAAQAAtkOAAQAAtkOAAQAAtkOAAQAAtkOAAQAAtkOAAQAAtkOAAQAAtkOAAQAAtkOAAQAAtvP/AUEYNAunFIVYAAAAAElFTkSuQmCC",
|
80 |
+
"text/plain": [
|
81 |
+
"<Figure size 640x480 with 1 Axes>"
|
82 |
+
]
|
83 |
+
},
|
84 |
+
"metadata": {},
|
85 |
+
"output_type": "display_data"
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"source": [
|
89 |
+
"import matplotlib.pyplot as plt\n",
|
90 |
+
"\n",
|
91 |
+
"lens = [len(x[\"prompt\"][\"input_ids\"]) for x in data[\"train\"]]\n",
|
92 |
+
"plt.hist(lens, bins=100)\n",
|
93 |
+
"plt.title(\"Distribution of prompt lengths\")\n",
|
94 |
+
"plt.axvline(256, color=\"red\")"
|
95 |
+
]
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"cell_type": "code",
|
99 |
+
"execution_count": 3,
|
100 |
+
"metadata": {},
|
101 |
+
"outputs": [
|
102 |
+
{
|
103 |
+
"data": {
|
104 |
+
"text/plain": [
|
105 |
+
"<matplotlib.lines.Line2D at 0x7f6eef316ce0>"
|
106 |
+
]
|
107 |
+
},
|
108 |
+
"execution_count": 3,
|
109 |
+
"metadata": {},
|
110 |
+
"output_type": "execute_result"
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"data": {
|
114 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkUAAAGzCAYAAAAhXWNYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABagUlEQVR4nO3dd3xT9f4/8FeSNulMB12UllVG2WhZdeGoVOhVUVRElMpQ8RZZ9wLiAHHB1auCyhD5fcWLKMPNEMQC4iirUNnIKJu2lNKkLZ3J+/dHyaGhBTrShiSv5+ORB805n5zzOZ/kJC/O+XzOUYmIgIiIiMjFqe1dASIiIqIbAUMRERERERiKiIiIiAAwFBEREREBYCgiIiIiAsBQRERERASAoYiIiIgIAEMREREREQCGIiIiIiIADEUNbuPGjVCpVPj666/tXZVqyczMxCOPPIJGjRpBpVJh5syZVy2bn5+PESNGICwsDCqVCmPHjq3Rup5++mk0b97cappKpcJrr71W43rTZZbP3MaNG+1dFbur6jPm6g4dOoQ+ffrAz88PKpUK33//vb2rVKU777wTd955p72rcU01+b50BK+99hpUKpW9q3FdtvxddbNBfW44CxcuxNChQ6HT6XDkyBE0adLEav6dd96J7Oxs7Nmzx041dBzjxo3D2rVrMXXqVISFhaFbt25XLfv2229j4cKFePXVVxEVFYV27do1YE2JqDYSExORnp6Ot956C/7+/tfcx+vbvn37sGzZMocNrzX5vqSa+/LLL5GVlVXj/3DXhFOGIovi4mLMmDEDH330kb2r4rDWr1+PBx98EP/+97+rVbZXr16YOnVqA9SMiOqqsLAQKSkpePnllzFq1Ch7Vwf79u3DtGnTcOedd1YKRT///LN9KlUDNfm+pJr78ssvsWfPnnoNRU59+qxr16749NNPcebMGXtXpcEVFBTYZDlZWVnw9/e3eVm6PrPZjKKiIntXg5zYuXPnAMAh9lutVgutVmvvalwTvwMdn1OHopdeegkmkwkzZsy4Zrljx45BpVJh4cKFleZd2afFco7177//xpNPPgk/Pz8EBwfj1VdfhYjg5MmTePDBB6HX6xEWFob33nuvynWaTCa89NJLCAsLg7e3Nx544AGcPHmyUrktW7bgvvvug5+fH7y8vNC7d2/88ccfVmUsddq3bx+eeOIJBAQE4LbbbrvmNh89ehSPPvooAgMD4eXlhV69emHVqlXK/IULF0KlUkFEMHv2bKhUqqueW7acz01PT8eqVauUsseOHVOWc+zYsSpfU5N+Lhs2bIBKpcJ3331Xad6XX34JlUqFlJSUay4jNzcX48aNQ/PmzaHT6RAREYEhQ4YgOztbKZOVlYXhw4cjNDQUHh4e6NKlCz7//HNlfmlpKQIDAzF06NBKyzcajfDw8LD6n2JxcTGmTp2KVq1aQafTITIyEhMnTkRxcbHVa1UqFUaNGoXFixejQ4cO0Ol0WLNmDQDg9OnTGDZsGEJDQ6HT6dChQwf83//9X6X1nzp1Cv3794e3tzdCQkIwbty4Suu5luutp7CwENHR0YiOjkZhYaEyPScnB40bN8Ytt9wCk8kEANi1axeefvpptGzZEh4eHggLC8OwYcNw/vx5q3XWdZ+yfJaWLl1arX3qSmazGTNnzkSHDh3g4eGB0NBQPPfcc7hw4YJVue3btyM+Ph5BQUHw9PREixYtMGzYsOsu/4cffkBCQgLCw8Oh0+kQFRWFN954Q2kni0OHDmHAgAEICwuDh4cHIiIi8Pjjj8NgMFxz+b/99hseffRRNG3aVPl8jRs3zur9qcprr72GZs2aAQAmTJgAlUqlHJ252umrqvqYWD6333//PTp27Kh8biyf3YpOnz6N4cOHK23RokULPP/88ygpKcHChQvx6KOPAgDuuusu5XvE8h1RVZ+i6+2rwOXv9//+97+YP38+oqKioNPp0L17d2zbtu2abWRhy+/LK+s0e/ZstGzZEl5eXujTpw9OnjwJEcEbb7yBiIgIeHp64sEHH0ROTo7VMqrzudq/fz88PT0xZMgQq9f+/vvv0Gg0mDRpUrW2/0pffPEFYmJi4OnpicDAQDz++OOV9rU777wTHTt2xL59+3DXXXfBy8sLTZo0wTvvvFNpecePH8cDDzxg9b21du3aSu//qlWrcPz4caV9r/yMms1mvPXWW4iIiICHhwfuueceHD58uGYbJ07os88+EwCybds2GTZsmHh4eMjp06eV+b1795YOHTooz9PT0wWAfPbZZ5WWBUCmTp2qPJ86daoAkK5du8qgQYNkzpw5kpCQIADk/fffl7Zt28rzzz8vc+bMkVtvvVUAyK+//qq8fsOGDQJAOnXqJJ07d5b3339fXnzxRfHw8JA2bdrIxYsXlbLJycmi1WolNjZW3nvvPfnggw+kc+fOotVqZcuWLZXq1L59e3nwwQdlzpw5Mnv27Ku2T0ZGhoSGhoqvr6+8/PLL8v7770uXLl1ErVbLt99+KyIiR44ckUWLFgkAuffee2XRokWyaNGiqy5v0aJFEhQUJF27dlXK5ufnK+9Fenq61Wss7bBhwwZlWmJiojRr1uyq7W82myUyMlIGDBhQqQ79+vWTqKioq26ziEheXp507NhRNBqNPPPMMzJ37lx54403pHv37rJz504REbl48aK0a9dO3N3dZdy4cfLhhx/K7bffLgBk5syZyrKGDRsm/v7+UlxcbLWOzz//XPnsiYiYTCbp06ePeHl5ydixY+WTTz6RUaNGiZubmzz44IOVtrVdu3YSHBws06ZNk9mzZ8vOnTslIyNDIiIiJDIyUl5//XWZO3euPPDAAwJAPvjgA+X1Fy9elDZt2oiHh4dMnDhRZs6cKTExMdK5c+dKbV2V6q5n8+bNotFoZNy4ccq0xx9/XDw9PeXgwYPKtP/+979y++23y+uvvy7z58+XMWPGiKenp/To0UPMZrNSriH3qao+YyNGjBA3Nzd55plnZN68eTJp0iTx9vaW7t27S0lJiYiIZGZmSkBAgLRp00beffdd+fTTT+Xll1+Wdu3aXbNNRUT69+8vjz32mLz77rsyd+5cefTRRwWA/Pvf/1bKFBcXS4sWLSQ8PFzefPNNWbBggUybNk26d+8ux44du+byX3jhBenXr5+8/fbb8sknn8jw4cNFo9HII488cs3X/fXXX/LBBx8IABk0aJAsWrRIvvvuu6u2k8jl96oiANKlSxdp3LixvPHGGzJz5kxp2bKleHl5SXZ2tlLu9OnTEh4eruwL8+bNk1dffVXatWsnFy5ckCNHjsjo0aMFgLz00kvK90hGRoaIlH9v9+7dW1ledfdVy/f7TTfdJK1atZL//Oc/8s4770hQUJBEREQo7/HV2Pr7smKdunbtKu3bt5f3339fXnnlFdFqtdKrVy956aWX5JZbbpEPP/xQRo8eLSqVSoYOHWq1jOp8rkRE3n33XQEgP/zwg4iI5OfnS1RUlLRv316Kioquue1Vvd9vvvmmqFQqGThwoMyZM0emTZsmQUFB0rx5c7lw4YJSrnfv3hIeHi6RkZEyZswYmTNnjtx9990CQFavXq2Uy8/Pl5YtW4qnp6e8+OKLMnPmTOnRo4d06dLF6nvr559/lq5du0pQUJDSvpbPq+U74KabbpKYmBj54IMP5LXXXhMvLy/p0aPHNbfxSk4fio4cOSJubm4yevRoZb4tQtGzzz6rTCsrK5OIiAhRqVQyY8YMZfqFCxfE09NTEhMTlWmWN69JkyZiNBqV6cuWLRMAMmvWLBEpDwCtW7eW+Ph4qx+QixcvSosWLeTee++tVKdBgwZVq33Gjh0rAOS3335TpuXl5UmLFi2kefPmYjKZrLY/KSmpWstt1qyZJCQkWE2zZSgSEZk8ebLodDrJzc1VpmVlZYmbm5tVuapMmTJFAChfZBVZ2njmzJkCQL744gtlXklJicTGxoqPj4/ynq1du1YAyIoVK6yW069fP2nZsqXyfNGiRaJWq63aWkRk3rx5AkD++OMPq21Vq9Wyd+9eq7LDhw+Xxo0bW/3AiJQHET8/P+VH31L3ZcuWKWUKCgqkVatW1QpF1V2PSPn7oFarZdOmTbJ8+fJKP0QiYlXe4quvvhIAsmnTJmVaQ+1TIpU/Y7/99psAkMWLF1vVc82aNVbTv/vuO6uwWxNVtcNzzz0nXl5eyo/Szp07BYAsX77cJsufPn26qFQqOX78+DVfa/nue/fdd62m1zQUabVaOXz4sDLtr7/+EgDy0UcfKdOGDBkiarW6yja07H+Wz1JVn9UrQ1F191XLNjZq1EhycnKUsj/88EOV+/CV6uP70lKn4OBgq++yyZMnKyGztLRUmT5o0CDRarVWIaY6nyuR8v+Y3XbbbRIaGirZ2dmSlJQkbm5u1fosX/l+Hzt2TDQajbz11ltW5Xbv3i1ubm5W03v37i0A5H//+58yrbi4WMLCwqz+Y/vee+8JAPn++++VaYWFhRIdHV3ps5CQkFDl59LyHdCuXTur/6jOmjVLAMju3buvu60WTn36DABatmyJp556CvPnz8fZs2dtttwRI0Yof2s0GnTr1g0iguHDhyvT/f390bZtWxw9erTS64cMGQJfX1/l+SOPPILGjRtj9erVAIC0tDQcOnQITzzxBM6fP4/s7GxkZ2ejoKAA99xzDzZt2gSz2Wy1zJEjR1ar7qtXr0aPHj2sTrH5+Pjg2WefxbFjx7Bv377qNYIdDBkyBMXFxVZDL5cuXYqysjI8+eST13ztN998gy5duuChhx6qNM9yqHv16tUICwvDoEGDlHnu7u4YPXo08vPz8euvvwIA7r77bgQFBWHp0qVKuQsXLmDdunUYOHCgMm358uVo164doqOjlfcwOzsbd999N4DyU4IV9e7dG+3bt1eeiwi++eYb3H///RARq2XEx8fDYDBgx44dSt0bN26MRx55RHm9l5cXnn322Wu2S03XA5SfRunQoQMSExPxz3/+E71798bo0aOtlunp6an8XVRUhOzsbPTq1QsArJZlUd/7VFWWL18OPz8/3HvvvVbbHBMTAx8fH+X9sfQTWblyJUpLS6+6vKpUbIe8vDxkZ2fj9ttvx8WLF3HgwAEAgJ+fHwBg7dq1uHjxYq2XX1BQgOzsbNxyyy0QEezcubNGy6qtuLg4REVFKc87d+4MvV6vvE9msxnff/897r///ipHZNVm2Hd191WLgQMHIiAgQHl+++23A0CVn6Ur11Nf35ePPvqo8t4DQM+ePQEATz75JNzc3Kyml5SU4PTp08q06nyuAECtVmPhwoXIz89H3759MWfOHEyePLlWI+O+/fZbmM1mPPbYY1b7S1hYGFq3bl3p+8zHx8fqe1mr1aJHjx5Wbb5mzRo0adIEDzzwgDLNw8MDzzzzTI3rN3ToUKt+Z9V9jyty+lAEAK+88grKysqu27eoJpo2bWr13M/PDx4eHggKCqo0/cq+CQDQunVrq+cqlQqtWrVS+t4cOnQIQPlw2eDgYKvHggULUFxcXKmvQYsWLapV9+PHj6Nt27aVpluG0B8/frxay7GH6OhodO/eHYsXL1amLV68GL169UKrVq2u+dojR46gY8eO1yxz/PhxtG7dGmq19a5xZdu4ublhwIAB+OGHH5Q+O99++y1KS0utQtGhQ4ewd+/eSu9hmzZtAJT3iajoyvfw3LlzyM3Nxfz58ystw9KnybKM48ePo1WrVpV+YKp6r69Uk/UA5V9u//d//4f09HTk5eXhs88+q7TenJwcjBkzBqGhofD09ERwcLCyfVX1k6nvfaoqhw4dgsFgQEhISKXtzs/PV7a5d+/eGDBgAKZNm4agoCA8+OCD+Oyzz6rVX2vv3r146KGH4OfnB71ej+DgYOWHwtIOLVq0wPjx47FgwQIEBQUhPj4es2fPvm5/IgA4ceIEnn76aQQGBsLHxwfBwcHo3bu31fLr25XvHQAEBAQo79O5c+dgNBqvu//VRHX31avV0RKQqvosXbme+vq+rOozDwCRkZFVTq9Y1+p8riyioqLw2muvYdu2bejQoQNeffXVWtX30KFDEBG0bt260v6yf//+St9nERERlb4XKn4ugPL2i4qKqlTuet/nVante1yRUw/Jt2jZsiWefPJJzJ8/Hy+++GKl+Vf7X8qVHSEr0mg01ZoGlP8vvKYsR4HeffdddO3atcoyPj4+Vs8r/s/hRlGbtr2eIUOGYMyYMTh16hSKi4uxefNmfPzxx7VeXm09/vjj+OSTT/DTTz+hf//+WLZsGaKjo9GlSxeljNlsRqdOnfD+++9XuYwrv/yufA8tn4Mnn3wSiYmJVS6jc+fOddmMWq9n7dq1AMqPAh06dKhSoHvsscfw559/YsKECejatSt8fHxgNptx3333VTrKCdT/PlUVs9mMkJAQq5BdUXBwMAAoF4bbvHkzVqxYgbVr12LYsGF47733sHnz5kr7okVubi569+4NvV6P119/HVFRUfDw8MCOHTswadIkq3Z477338PTTT+OHH37Azz//jNGjR2P69OnYvHkzIiIiqly+yWTCvffei5ycHEyaNAnR0dHw9vbG6dOn8fTTT1fZztVR0/22vt8nW7gR63i1Ol2vrjX5XFlYLmlw5swZnD9/HmFhYTWur9lshkqlwk8//VRlHa/cDxq6zW2xPpcIRUD50aIvvvgC//nPfyrNs6TJ3Nxcq+n1ecTEciTIQkRw+PBh5YfHcihar9cjLi7Oputu1qwZDh48WGm65ZCrZUSKLdRH2z7++OMYP348vvrqKxQWFsLd3d3q6MzVREVFXfeCnc2aNcOuXbtgNput/gdaVdvccccdaNy4MZYuXYrbbrsN69evx8svv1xpnX/99RfuueeeWp0iCA4Ohq+vL0wm03U/B82aNcOePXsgIlbrquq9rst6gPKRZa+//jqGDh2KtLQ0jBgxArt377b6H21ycjKmTZuGKVOmKK+78nNvS9fbp6oSFRWFX375Bbfeemu1/lPRq1cv9OrVC2+99Ra+/PJLDB48GEuWLLE69VfRxo0bcf78eXz77be44447lOnp6elVlu/UqRM6deqEV155BX/++SduvfVWzJs3D2+++WaV5Xfv3o2///4bn3/+udUIo3Xr1l13W64lICCg0j4L1H6/DQ4Ohl6vv+7+V5N9pCb7al005PdlddX0czVv3jysW7cOb731FqZPn47nnnsOP/zwQ43XGxUVBRFBixYtlKPdddWsWTPs27ev0vdWVaPGGuLq2i5x+gwofzOffPJJfPLJJ8jIyLCap9frERQUhE2bNllNnzNnTr3V53//+x/y8vKU519//TXOnj2Lvn37AgBiYmIQFRWF//73v8jPz6/0esv1RWqjX79+2Lp1q9Xw9YKCAsyfPx/Nmze36tNSV5ZwV7FtTSYT5s+fX+tlBgUFoW/fvvjiiy+wePFi3HfffZVOsVRlwIAB+Ouvv6oc0m/5n0S/fv2QkZFh1VeorKwMH330EXx8fJTTEkD5ufpHHnkEK1aswKJFi1BWVlYpnD322GM4ffo0Pv3000rrLCwsvO71pDQaDQYMGIBvvvmmyh+Uip+Dfv364cyZM1b9rS5evFittq7JekpLS/H0008jPDwcs2bNwsKFC5GZmYlx48ZZLQ+o/D+0+rztwfX2qao89thjMJlMeOONNyrNKysrU4LBhQsXKm2L5QjutU6hVdUOJSUllb5bjEYjysrKrKZ16tQJarW6xssXEcyaNeuqr6mOqKgoGAwG7Nq1S5l29uzZKved6lCr1ejfvz9WrFiB7du3V5pvqb+3tzeAyv+JqkpN9tW6aMjvy+qq7ucKKA9KEyZMwIABA/DSSy/hv//9L3788Uf873//q/F6H374YWg0GkybNq3S/iAilS63UR3x8fE4ffo0fvzxR2VaUVFRld+Z3t7e9X5K2GWOFAHAyy+/jEWLFuHgwYPo0KGD1bwRI0ZgxowZGDFiBLp164ZNmzbh77//rre6BAYG4rbbbsPQoUORmZmJmTNnolWrVkrnMrVajQULFqBv377o0KEDhg4diiZNmuD06dPYsGED9Ho9VqxYUat1v/jii/jqq6/Qt29fjB49GoGBgfj888+Rnp6Ob775ptI5+rro0KEDevXqhcmTJyMnJweBgYFYsmRJpR+AmhoyZIjSobiqH7SqTJgwAV9//TUeffRRDBs2DDExMcjJycGPP/6IefPmoUuXLnj22WfxySef4Omnn0ZqaiqaN2+Or7/+Gn/88Qdmzpxp1ZEXKO+8+dFHH2Hq1Kno1KlTpVubPPXUU1i2bBlGjhyJDRs24NZbb4XJZMKBAwewbNkyrF279rodHmfMmIENGzagZ8+eeOaZZ9C+fXvk5ORgx44d+OWXX5TrlzzzzDP4+OOPMWTIEKSmpqJx48ZYtGgRvLy8qtU+1V3Pm2++ibS0NCQnJ8PX1xedO3fGlClT8Morr+CRRx5Bv379oNfrcccdd+Cdd95BaWkpmjRpgp9//vmq/5O1hevtU1Xp3bs3nnvuOUyfPh1paWno06cP3N3dcejQISxfvhyzZs3CI488gs8//xxz5szBQw89hKioKOTl5eHTTz+FXq9Hv379rrr8W265BQEBAUhMTMTo0aOhUqmwaNGiSj8o69evx6hRo/Doo4+iTZs2KCsrw6JFi5SwejXR0dGIiorCv//9b5w+fRp6vR7ffPNNjfpQVOXxxx/HpEmT8NBDD2H06NG4ePEi5s6dizZt2lTZSb463n77bfz888/o3bs3nn32WbRr1w5nz57F8uXL8fvvv8Pf3x9du3aFRqPBf/7zHxgMBuh0Otx9990ICQmptLya7qu11ZDfl9VV3c+ViGDYsGHw9PTE3LlzAQDPPfccvvnmG4wZMwZxcXEIDw+v9nqjoqLw5ptvYvLkyTh27Bj69+8PX19fpKen47vvvsOzzz5b46t5P/fcc/j4448xaNAgjBkzBo0bN8bixYvh4eEBwProUExMDJYuXYrx48eje/fu8PHxwf3331+j9V1XtcepOZCKQ/KvlJiYKACshuSLlA9vHD58uPj5+Ymvr6889thjkpWVddUh+efOnau0XG9v70rru3L4v2Xo4FdffSWTJ0+WkJAQ8fT0lISEhCqHz+7cuVMefvhhadSokeh0OmnWrJk89thjkpycfN06XcuRI0fkkUceEX9/f/Hw8JAePXrIypUrK5VDHYfkW9YVFxcnOp1OQkND5aWXXpJ169bVaki+RXFxsQQEBIifn58UFhZWq34iIufPn5dRo0ZJkyZNRKvVSkREhCQmJloNQ8/MzJShQ4dKUFCQaLVa6dSpU5WXaxC5fO0kAPLmm29WWaakpET+85//SIcOHUSn00lAQIDExMTItGnTxGAwWG3r1do6MzNTkpKSJDIyUtzd3SUsLEzuuecemT9/vlW548ePywMPPCBeXl4SFBQkY8aMUYaXX29IfnXWk5qaKm5ubvLCCy9Yva6srEy6d+8u4eHhyrVKTp06JQ899JD4+/uLn5+fPProo3LmzBm77lNXG2o+f/58iYmJEU9PT/H19ZVOnTrJxIkT5cyZMyIismPHDhk0aJA0bdpUdDqdhISEyD/+8Q/Zvn37ddv0jz/+kF69eomnp6eEh4fLxIkTlUs6WN6To0ePyrBhwyQqKko8PDwkMDBQ7rrrLvnll1+uu/x9+/ZJXFyc+Pj4SFBQkDzzzDPKkPirfW4trjYkX6T8ujAdO3YUrVYrbdu2lS+++OKqQ/Kr+tw2a9bM6tIJIuWfzyFDhkhwcLDodDpp2bKlJCUlWQ2j/vTTT6Vly5ai0Wis2ujKIfki1dtXr7WNV/t+uZKtvy+vVifLZ/nKSzNU9ZtWnc+VZUj6N998Y7W8EydOiF6vl379+l2znlW93yIi33zzjdx2223i7e0t3t7eEh0dLUlJSVbXKbtyP7Woah88evSoJCQkiKenpwQHB8u//vUv+eabbwSAbN68WSmXn58vTzzxhPj7+wsAZTlXa7drXW7nalQiN1BPOKJqKisrQ3h4OO6//378v//3/+xdHbKjjRs34q677sLy5cutLkdARI5r5syZGDduHE6dOlXppu71yWX6FJFz+f7773Hu3LlKl68nIiLHcuUtaYqKivDJJ5+gdevWDRqIABfrU0SOb8uWLdi1axfeeOMN3HTTTTbrTElERPbx8MMPo2nTpujatSsMBgO++OILHDhw4KqXyqhPDEXkUObOnYsvvvgCXbt2rfIGvkRE5Fji4+OxYMECLF68GCaTCe3bt8eSJUuqdakVW2OfIiIiIiKwTxERERERAIYiIiIiIgAu3qfIbDbjzJkz8PX1bZDLhxMREVHdiQjy8vIQHh5u0wtounQoOnPmTKUbchIREZFjOHny5FVvmFwbLh2KLJeBP3nyJPR6vZ1rQ9VWUABYLk1/5gxw6X5JRETkGoxGIyIjI212OxcLlw5FllNmer2eociRXLoZIgBAr2coIiJyUbbu+sKO1kRERERgKCIiIiICwFBEREREBIChiIiIiAgAQxERERERAIYiIiIiIgAMRUREREQAGIqIiIiIADAUEREREQFgKCIiIiICwFBEREREBIChiIiIiAiAi98QloiqR0RQZhaUmQQlJjNKLz3MApjNArNI+d8ikEt/my5Nl0vTTeYKDxGYzYCgvKyIQABALk0zA4LL0+XScsr/tn4dKk674nWosMxLRZVllP9tPd2yMLn85+V1WL3u8nRULIuK9al6esU2vV5ZqzVUqMeVy6ouQc1fVLv11OI1tXkRardNDfSSSp+R6r2mFuup+Usa7PMDAOPvbQNfD/davbahMRQR3eAsgaSo1ITiMjOKSk0oKjWjuOzyv8WlZqv5VytXVGpG0aXy5c8rv6akzIyyS+GlzCQoM5eHHyKi2nj+ziiGIiK6TERQUGLChYIS5BSUIOdiifL3hYslyCkohaGwBIbCUhgLy5BfXP4oKC5DUanphgwl7hoV1CrLA1CrK/ytUkGlUkGjhlJGpQI0alX5o8I0lUoFFXDp70uvRfkEFQD1lWWgUspW/FutUqH8ZRXLotK0S1OUvytOv7RmZZnKNKuyqipeZ70ey0Sr5SllrKdf/reaZSvUoTZq89IKW1XP66mdWq2rFi+qVf2crL1rsx4vreNEDcepKdENqqTMjHP5xcgwFCHTWHT53wp/ZxqLUVhqssn6dG5q6NzU8HDXQOeuhoebpvzvS9M83NXQuZXP07mVP7ea76aGrkI5j0vLsJTXuanhplHD7VKAufyvGhqNClqNGu4aVZ1+mImIbkQMRUTVVGYy42h2AfadMWLvGQP2nTXicFY+Mo3F1V6Gh7sagV5aBHhrEeitRYDX5X/9vdzh5+kOXw83+Hq4w0fnBm+d5lKQKQ8tWo0aajXDCBFRfWAoIqpCQXEZDmQYse+MEfvOGrH3jBEHM/JQXGausry7RoUQXw+E6nUI8/NAqN4DYXoP5e9QvQdCfHXw1nGXIyK6UfEbmlyeiOBodgE2Hz2PXScN2HY8B+nZBVWOzvDWahDdWI8O4Xq0b6xHmzBfNAv0QoCXlkdwiIgcHEMRuaRMYxH+OJyNPw6fx59HsnHWUFSpTIivDh3C9egQ7of2l0JQ00Avhh8iIifFUEQuwWwWpJ3Kxbp9mUjen4m/M/Ot5ms1asQ0C8BNTf1xc9MAdG3qjyAfnZ1qS0RE9sBQRE7rYkkZ/jx8HskHMvHL/iycy7vcIVqlAjo18cMtUUG4tVUjdGsWCE+txo61JSIie2MoIqciIkg5ch4L/zyGDQezUGq63DHIR+eGO9sG4972oejdJhj+Xlo71pSIiG40Nbr32WuvvVZ+cbIKj+joaGV+UVERkpKS0KhRI/j4+GDAgAHIzMy0WsaJEyeQkJAALy8vhISEYMKECSgrK7Mqs3HjRtx8883Q6XRo1aoVFi5cWKkus2fPRvPmzeHh4YGePXti69atNdkUcjKFJSYs3nIc8TM34YkFW/DzvkyUmgQRAZ4YEtsM/xvWAztevRcfP3EzHuzahIGIiIgqqfGRog4dOuCXX365vAC3y4sYN24cVq1aheXLl8PPzw+jRo3Cww8/jD/++AMAYDKZkJCQgLCwMPz55584e/YshgwZAnd3d7z99tsAgPT0dCQkJGDkyJFYvHgxkpOTMWLECDRu3Bjx8fEAgKVLl2L8+PGYN28eevbsiZkzZyI+Ph4HDx5ESEhInRqEHEumsQj/t+kElmw9CUNhKQDAS6vBgJsj8GSvZmgT6sOLDBIRUbWopAZ3rHvttdfw/fffIy0trdI8g8GA4OBgfPnll3jkkUcAAAcOHEC7du2QkpKCXr164aeffsI//vEPnDlzBqGhoQCAefPmYdKkSTh37hy0Wi0mTZqEVatWYc+ePcqyH3/8ceTm5mLNmjUAgJ49e6J79+74+OOPAQBmsxmRkZF44YUX8OKLL161/sXFxSguvtyvxGg0IjIyEgaDAXq9vrrNQPZWUAD4+AAAuk74Frnq8qM+TQO9kHhLczzaLQJ6B7nPDhER1ZzRaISfn5/Nf79rdPoMAA4dOoTw8HC0bNkSgwcPxokTJwAAqampKC0tRVxcnFI2OjoaTZs2RUpKCgAgJSUFnTp1UgIRAMTHx8NoNGLv3r1KmYrLsJSxLKOkpASpqalWZdRqNeLi4pQyVzN9+nT4+fkpj8jIyJpuPtlZSZkZ8zcdUZ4Xl5kR0ywAnw7phg3/vhPDb2vBQERERLVSo1DUs2dPLFy4EGvWrMHcuXORnp6O22+/HXl5ecjIyIBWq4W/v7/Va0JDQ5GRkQEAyMjIsApElvmWedcqYzQaUVhYiOzsbJhMpirLWJZxNZMnT4bBYFAeJ0+erMnmk51tO5aDf3z0Gz5Yd0iZNu+pm/H1yFjc2z4UGl4/iIiI6qBGfYr69u2r/N25c2f07NkTzZo1w7Jly+Dp6WnzytmaTqeDTsdrzzgaw8VSvL16P5ZuLw+xTbwvHwnq3SakdrdtJiIiukKNT59V5O/vjzZt2uDw4cMICwtDSUkJcnNzrcpkZmYiLCwMABAWFlZpNJrl+fXK6PV6eHp6IigoCBqNpsoylmWQ89hz2oD7P/5dCUSDekRi1ejb7VwrIiJyRnUKRfn5+Thy5AgaN26MmJgYuLu7Izk5WZl/8OBBnDhxArGxsQCA2NhY7N69G1lZWUqZdevWQa/Xo3379kqZisuwlLEsQ6vVIiYmxqqM2WxGcnKyUoacw8I/0vHQnD9wIuciIgM98fXIWEx/uDOH0xMRUf2QGvjXv/4lGzdulPT0dPnjjz8kLi5OgoKCJCsrS0RERo4cKU2bNpX169fL9u3bJTY2VmJjY5XXl5WVSceOHaVPnz6SlpYma9askeDgYJk8ebJS5ujRo+Ll5SUTJkyQ/fv3y+zZs0Wj0ciaNWuUMkuWLBGdTicLFy6Uffv2ybPPPiv+/v6SkZFRk80Rg8EgAMRgMNTodVT/Pkr+W5pNWinNJq2UZz7fJhcKii/PzM8XAcof+fn2qyQREdlFff1+16hP0alTpzBo0CCcP38ewcHBuO2227B582YEBwcDAD744AOo1WoMGDAAxcXFiI+Px5w5c5TXazQarFy5Es8//zxiY2Ph7e2NxMREvP7660qZFi1aYNWqVRg3bhxmzZqFiIgILFiwQLlGEQAMHDgQ586dw5QpU5CRkYGuXbtizZo1lTpfk2OaveEw/vvz3wCACfFt8c87o3itISIiqnc1uk6Rs6mv6xxQ7X266SjeWr0fQHkgSrqrVeVCFa5ThPx8wNu7AWtIRET2dsNcp4iovqzefVYJRP+6t03VgYiIiKieMBTRDWHz0fMYuzQNADD01uZ44Z7W9q0QERG5HIYisrv07AI887/tKCkzo0/7ULyS0N7eVSIiIhfEUER2VVRqQtLiHcgrKkNMswB8OOgmXpmaiIjsgqGI7Gr66v3Yd9aIAC93zH7iZni4a+xdJSIiclEMRWQ3P+0+i89TjgMA3h/YFWF+HnauERERuTKGIrKL07mFmPjNLgDAc71b4q62IXauERERuTqGIrKL137ci7yiMtzU1B//7tPW3tUhIiJiKKKG98u+TKzblwk3tQr/GdAZ7hp+DImIyP74a0QN6mJJGab+uBcAMOL2lmgT6mvnGhEREZVjKKIGNXvDYZzOLUQTf0+MvodXrCYiohsHQxE1mDO5hVjwWzoA4NV/tIeXtkb3IyYiIqpXDEXUYGb+8jeKy8zo0TwQ8R1C7V0dIiIiKwxF1CD+zszD16mnAAAv9ouGSsWrVhMR0Y2FoYgaxH9+OgCzAH07huHmpgH2rg4REVElDEVU77am5yD5QBY0ahX+Hc9rEhER0Y2JoYjq3YfJhwAAA7tHIirYx861ISIiqhpDEdWrv07m4vfD2XBTq/DPO6PsXR0iIqKrYiiiejVn42EAwINdmyAiwMvOtSEiIro6hiKqNydzLuLnfZkAgJG9W9q5NkRERNfGUET15n8pxyAC3NEmGK15Ow8iIrrBMRRRvSgqNWHZ9vLrEj19SzM714aIiOj6GIqoXqzdmwFDYSma+HvizjYh9q4OERHRdTEUUb1Ytv0kAOCRmAio1bx6NRER3fgYisjmTuZcxB+Hz0OlKg9FREREjoChiGxu+aV7nN0aFYTIQA7DJyIix8BQRDZlNgu+uRSKHuseaefaEBERVR9DEdlU6okLOJ1bCF+dG/q0D7V3dYiIiKqNoYhsasVfZwAAfTqEwcNdY+faEBERVR9DEdlMmcmM1bvPAgD+0aWxnWtDRERUMwxFZDNb0nOQnV8Cfy933NYqyN7VISIiqhGGIrIZy6mzvh0bw13DjxYRETkW/nKRTZSUmbFmbwYA4P7OPHVGRESOh6GIbOKPw9nIvViKIB8derZsZO/qEBER1RhDEdnEil3lp87+0bkxNLytBxEROSCGIqozk1mw4UAWAOC+jmF2rg0REVHtMBRRne06lYsLF0vhq3NDTLMAe1eHiIioVhiKqM42HjwHALitdRBHnRERkcPiLxjV2ca/y0PRnW2D7VwTIiKi2mMoojrJKSjBrlO5AIDebULsWxkiIqI6YCiiOtn09zmIANFhvgjz87B3dYiIiGqNoYjqZOPB8lFnd0XzKBERETk2hiKqNbNZsOlQNgDgzjbsT0RERI6NoYhqbc8ZA3IKSuCrc8PNHIpPREQOjqGIam3z0fMAgJ4tAzkUn4iIHB5/yajWNh/NAQD04r3OiIjICTAUUa2UmczYms5QREREzoOhiGpl7xkj8ovLoPdwQ7vGentXh4iIqM4YiqhWUi71J+rRohE0apWda0NERFR3DEVUK5ZO1rFRPHVGRETOgaGIaqzMZMY2pT9RoJ1rQ0REZBsMRVRju08bUFBigp+nO9qFsT8RERE5B4YiqjHLUPyeLQKhZn8iIiJyEnUKRTNmzIBKpcLYsWOVaUVFRUhKSkKjRo3g4+ODAQMGIDMz0+p1J06cQEJCAry8vBASEoIJEyagrKzMqszGjRtx8803Q6fToVWrVli4cGGl9c+ePRvNmzeHh4cHevbsia1bt9Zlc6iaLP2JOBSfiIicSa1D0bZt2/DJJ5+gc+fOVtPHjRuHFStWYPny5fj1119x5swZPPzww8p8k8mEhIQElJSU4M8//8Tnn3+OhQsXYsqUKUqZ9PR0JCQk4K677kJaWhrGjh2LESNGYO3atUqZpUuXYvz48Zg6dSp27NiBLl26ID4+HllZWbXdJKqGUpMZ246VHyliJ2siInIqUgt5eXnSunVrWbdunfTu3VvGjBkjIiK5ubni7u4uy5cvV8ru379fAEhKSoqIiKxevVrUarVkZGQoZebOnSt6vV6Ki4tFRGTixInSoUMHq3UOHDhQ4uPjlec9evSQpKQk5bnJZJLw8HCZPn16tbfDYDAIADEYDNXfeBeXejxHmk1aKV2mrRWTyWyfSuTniwDlj/x8+9SBiIjspr5+v2t1pCgpKQkJCQmIi4uzmp6amorS0lKr6dHR0WjatClSUlIAACkpKejUqRNCQ0OVMvHx8TAajdi7d69S5splx8fHK8soKSlBamqqVRm1Wo24uDilTFWKi4thNBqtHlQzKUcu3e+M/YmIiMjJuNX0BUuWLMGOHTuwbdu2SvMyMjKg1Wrh7+9vNT00NBQZGRlKmYqByDLfMu9aZYxGIwoLC3HhwgWYTKYqyxw4cOCqdZ8+fTqmTZtWvQ2lKinXJ2J/IiIicjI1OlJ08uRJjBkzBosXL4aHh0d91aneTJ48GQaDQXmcPHnS3lVyKKUmM7YfuwAA6MX+RERE5GRqFIpSU1ORlZWFm2++GW5ubnBzc8Ovv/6KDz/8EG5ubggNDUVJSQlyc3OtXpeZmYmwsDAAQFhYWKXRaJbn1yuj1+vh6emJoKAgaDSaKstYllEVnU4HvV5v9aDq23UqF4WlJgR6a9EmxNfe1SEiIrKpGoWie+65B7t370ZaWpry6NatGwYPHqz87e7ujuTkZOU1Bw8exIkTJxAbGwsAiI2Nxe7du61Gia1btw56vR7t27dXylRchqWMZRlarRYxMTFWZcxmM5KTk5UyZHu8PhERETmzGvUp8vX1RceOHa2meXt7o1GjRsr04cOHY/z48QgMDIRer8cLL7yA2NhY9OrVCwDQp08ftG/fHk899RTeeecdZGRk4JVXXkFSUhJ0Oh0AYOTIkfj4448xceJEDBs2DOvXr8eyZcuwatUqZb3jx49HYmIiunXrhh49emDmzJkoKCjA0KFD69QgdHWWofg9WvDWHkRE5Hxq3NH6ej744AOo1WoMGDAAxcXFiI+Px5w5c5T5Go0GK1euxPPPP4/Y2Fh4e3sjMTERr7/+ulKmRYsWWLVqFcaNG4dZs2YhIiICCxYsQHx8vFJm4MCBOHfuHKZMmYKMjAx07doVa9asqdT5mmxDRLD7lAEA0DXS376VISIiqgcqERF7V8JejEYj/Pz8YDAY2L/oOk7nFuLWGevhplZhz7R4eLhr7FeZggLAx6f87/x8wNvbfnUhIqIGV1+/37z3GVXL7lO5AIC2Yb72DURERET1hKGIquWvS6fOOkf42bkmRERE9YOhiKrF0p+oUxN/+1aEiIionjAU0XWJCHZdOn3GI0VEROSsGIrouk7kXISxqAxaNzXahPKijURE5JwYiui6LP2J2jXWQ+vGjwwRETkn/sLRdVlGnnVuwlNnRETkvBiK6Lp2ceQZERG5AIYiuiazWbDntCUU+du3MkRERPWIoYiu6Wh2PgpKTPB01yAqmFeOJiIi58VQRNdkOXXWIVwPNw0/LkRE5Lz4K0fXdLk/kb99K0JERFTPGIromnafZidrIiJyDQxFdFVlJjP2nrl0ew+GIiIicnIMRXRVh7LyUVRqhq/ODS0asZM1ERE5N4YiuirLTWA7NvGDWq2yc22IiIjqF0MRXdWu07kA2J+IiIhcA0MRXZXlSBH7ExERkStgKKIqlZSZsf9sHgCgC4fjExGRC2AooiodzMhDickMfy93RAR42rs6RERE9Y6hiKpk6U/UqYkfVCp2siYiIufHUERV2n2KF20kIiLXwlBEVfqLt/cgIiIXw1BElRSVmvB3Znknax4pIiIiV8FQRJXsP2uEySwI8tEiTO9h7+oQERE1CIYiqsQyFL9dYz07WRMRkctgKKJK9p81AgDaN9bbuSZEREQNh6GIKrGEonYMRURE5EIYisiK2Sw4kHH59BkREZGrYCgiKycvXER+cRm0GjVaBnvbuzpEREQNhqGIrFhOnbUO9YG7hh8PIiJyHfzVIyv7zvLUGRERuSaGIrJygJ2siYjIRTEUkZWDl65k3S7M1841ISIialgMRaQoKC7D8fMXAQBtGYqIiMjFMBSRwnKUKNhXh0Y+OjvXhoiIqGExFJHi4KXrE0XzKBEREbkghiJSWDpZMxQREZErYigixQHlSBFHnhERkethKCIAgMjl23tEN+aRIiIicj0MRQQAyDAWwVBYCo1ahVYhPvauDhERUYNjKCIAl0+dtQzyhs5NY+faEBERNTyGIgIAHDhrOXXG/kREROSaGIoIAHAggyPPiIjItTEUEQBeo4iIiIihiFBSZsbhrHwAvL0HERG5LoYiwpFz+SgzC3x1bmji72nv6hAREdkFQxFdPnXW2BcqlcrOtSEiIrIPhiLC/kudrHnqjIiIXBlDEV0ejs/bexARkQtjKCLl9Fk73t6DiIhcGEORi7tQUIIMYxEAoE0oQxEREbkuhiIXZ7m9R0SAJ3w93O1cGyIiIvthKHJxB5UrWbM/ERERubYahaK5c+eic+fO0Ov10Ov1iI2NxU8//aTMLyoqQlJSEho1agQfHx8MGDAAmZmZVss4ceIEEhIS4OXlhZCQEEyYMAFlZWVWZTZu3Iibb74ZOp0OrVq1wsKFCyvVZfbs2WjevDk8PDzQs2dPbN26tSabQpcc4JWsiYiIANQwFEVERGDGjBlITU3F9u3bcffdd+PBBx/E3r17AQDjxo3DihUrsHz5cvz66684c+YMHn74YeX1JpMJCQkJKCkpwZ9//onPP/8cCxcuxJQpU5Qy6enpSEhIwF133YW0tDSMHTsWI0aMwNq1a5UyS5cuxfjx4zF16lTs2LEDXbp0QXx8PLKysuraHi5nf4VrFBEREbk0qaOAgABZsGCB5Obmiru7uyxfvlyZt3//fgEgKSkpIiKyevVqUavVkpGRoZSZO3eu6PV6KS4uFhGRiRMnSocOHazWMXDgQImPj1ee9+jRQ5KSkpTnJpNJwsPDZfr06TWqu8FgEABiMBhq9DpnYTKZJfqVn6TZpJVyKNNo7+pUX36+CFD+yM+3d22IiKiB1dfvd637FJlMJixZsgQFBQWIjY1FamoqSktLERcXp5SJjo5G06ZNkZKSAgBISUlBp06dEBoaqpSJj4+H0WhUjjalpKRYLcNSxrKMkpISpKamWpVRq9WIi4tTylxNcXExjEaj1cOVnci5iMJSE7RuajRv5G3v6hAREdlVjUPR7t274ePjA51Oh5EjR+K7775D+/btkZGRAa1WC39/f6vyoaGhyMjIAABkZGRYBSLLfMu8a5UxGo0oLCxEdnY2TCZTlWUsy7ia6dOnw8/PT3lERkbWdPOdyoFLnazbhPrATcM+90RE5Npq/EvYtm1bpKWlYcuWLXj++eeRmJiIffv21UfdbG7y5MkwGAzK4+TJk/aukl1ZOlm3DeXIMyIiIreavkCr1aJVq1YAgJiYGGzbtg2zZs3CwIEDUVJSgtzcXKujRZmZmQgLCwMAhIWFVRolZhmdVrHMlSPWMjMzodfr4enpCY1GA41GU2UZyzKuRqfTQafT1XSTnZbl9h68kjUREZENrlNkNptRXFyMmJgYuLu7Izk5WZl38OBBnDhxArGxsQCA2NhY7N6922qU2Lp166DX69G+fXulTMVlWMpYlqHVahETE2NVxmw2Izk5WSlD1XMw89KRIg7HJyIiqtmRosmTJ6Nv375o2rQp8vLy8OWXX2Ljxo1Yu3Yt/Pz8MHz4cIwfPx6BgYHQ6/V44YUXEBsbi169egEA+vTpg/bt2+Opp57CO++8g4yMDLzyyitISkpSjuCMHDkSH3/8MSZOnIhhw4Zh/fr1WLZsGVatWqXUY/z48UhMTES3bt3Qo0cPzJw5EwUFBRg6dKgNm8a5FZaYcOx8AQCGIiIiIqCGoSgrKwtDhgzB2bNn4efnh86dO2Pt2rW49957AQAffPAB1Go1BgwYgOLiYsTHx2POnDnK6zUaDVauXInnn38esbGx8Pb2RmJiIl5//XWlTIsWLbBq1SqMGzcOs2bNQkREBBYsWID4+HilzMCBA3Hu3DlMmTIFGRkZ6Nq1K9asWVOp8zVd3eGsfIgAgd5aBPvwlCIREZFKRMTelbAXo9EIPz8/GAwG6PWu1dl4+faTmPD1LvRqGYglzzrYaceCAsDHp/zv/HzAm5cTICJyJfX1+81x2C7qoHJ7D9cKg0RERFfDUOSi2MmaiIjIGkORi7IcKWoTylBEREQEMBS5pAsFJcjKKwbAI0VEREQWDEUuyHLqLCLAEz66Gl+/k4iIyCkxFLmgy52seZSIiIjIgqHIBR3KKg9FrUIYioiIiCwYilzQkazyK1m3CvGxc02IiIhuHAxFLujwuXwADEVEREQVMRS5GENhKc5dGnkWFcwrQRMREVkwFLmYw1nlR4nC9B7w9XC3c22IiIhuHAxFLuZIFk+dERERVYWhyMWwPxEREVHVGIpcjOX0WRRDERERkRWGIhdz5NKRInayJiIissZQ5EKKSk04mXMRAE+fERERXYmhyIWkZxfALIDeww3BPjp7V4eIiOiGwlDkQg5XGHmmUqnsXBsiIqIbC0ORCznM4fhERERXxVDkQjgcn4iI6OoYilyI5cKNUcEMRURERFdiKHIRJrPgaHYBAB4pIiIiqgpDkYs4deEiSsrM0LqpERHgZe/qEBER3XAYilyEpZN1yyBvaNQceUZERHQlhiIXwZFnRERE18ZQ5CIYioiIiK6NochFcDg+ERHRtTEUuQARUY4UcTg+ERFR1RiKXMC5vGLkFZVBrQJaBHnbuzpEREQ3JIYiF2A5dRYZ6AUPd42da0NERHRjYihyAZYrWbfiqTMiIqKrYihyARx5RkREdH0MRS4g/fxFAOxPREREdC0MRS7g+Pnye541ZygiIiK6KoYiJ1dqMuPUhUIAQPNGDEVERERXw1Dk5E5fKITJLPBwVyPEV2fv6hAREd2wGIqc3LFLp86aBXpDzRvBEhERXRVDkZM7fqmTdbNGXnauCRER0Y2NocjJHWMnayIiomphKHJyJ3ikiIiIqFoYipyccqSII8+IiIiuiaHIiZnMgpM55cPxeaSIiIjo2hiKnNhZQyFKTGZoNWo09vO0d3WIiIhuaAxFTswy8iwy0BMaDscnIiK6JoYiJ3YixxKKeOqMiIjoehiKnNjZ3PL+RE38eeqMiIjoehiKnNjp3CIAQDhDERER0XUxFDmxs4byI0Xh/h52rgkREdGNj6HIiZ25dPqMI8+IiIiuj6HISYkIzhrKT5+xTxEREdH1MRQ5qZyCEhSXmaFSAaF6nj4jIiK6HoYiJ3XmUifrYB8dtG58m4mIiK6Hv5ZO6sylTtaNeeqMiIioWmoUiqZPn47u3bvD19cXISEh6N+/Pw4ePGhVpqioCElJSWjUqBF8fHwwYMAAZGZmWpU5ceIEEhIS4OXlhZCQEEyYMAFlZWVWZTZu3Iibb74ZOp0OrVq1wsKFCyvVZ/bs2WjevDk8PDzQs2dPbN26tSab49TOKNco4qkzIiKi6qhRKPr111+RlJSEzZs3Y926dSgtLUWfPn1QUFCglBk3bhxWrFiB5cuX49dff8WZM2fw8MMPK/NNJhMSEhJQUlKCP//8E59//jkWLlyIKVOmKGXS09ORkJCAu+66C2lpaRg7dixGjBiBtWvXKmWWLl2K8ePHY+rUqdixYwe6dOmC+Ph4ZGVl1aU9nIalkzVHnhEREVWT1EFWVpYAkF9//VVERHJzc8Xd3V2WL1+ulNm/f78AkJSUFBERWb16tajVasnIyFDKzJ07V/R6vRQXF4uIyMSJE6VDhw5W6xo4cKDEx8crz3v06CFJSUnKc5PJJOHh4TJ9+vRq199gMAgAMRgMNdhqx/DPxanSbNJKWfDbUXtXxfby80WA8kd+vr1rQ0REDay+fr/r1KfIYDAAAAIDAwEAqampKC0tRVxcnFImOjoaTZs2RUpKCgAgJSUFnTp1QmhoqFImPj4eRqMRe/fuVcpUXIaljGUZJSUlSE1NtSqjVqsRFxenlKlKcXExjEaj1cNZWW7xEe7H02dERETVUetQZDabMXbsWNx6663o2LEjACAjIwNarRb+/v5WZUNDQ5GRkaGUqRiILPMt865Vxmg0orCwENnZ2TCZTFWWsSyjKtOnT4efn5/yiIyMrPmGO4gzvMUHERFRjdQ6FCUlJWHPnj1YsmSJLetTryZPngyDwaA8Tp48ae8q1YtSkxlZeZf6FLGjNRERUbW41eZFo0aNwsqVK7Fp0yZEREQo08PCwlBSUoLc3Fyro0WZmZkICwtTylw5SswyOq1imStHrGVmZkKv18PT0xMajQYajabKMpZlVEWn00Gn09V8gx1MVl4xzAK4a1QI8nb+7SUiIrKFGh0pEhGMGjUK3333HdavX48WLVpYzY+JiYG7uzuSk5OVaQcPHsSJEycQGxsLAIiNjcXu3butRomtW7cOer0e7du3V8pUXIaljGUZWq0WMTExVmXMZjOSk5OVMq4s49I1ikL1HlCrVXauDRERkWOo0ZGipKQkfPnll/jhhx/g6+ur9N/x8/ODp6cn/Pz8MHz4cIwfPx6BgYHQ6/V44YUXEBsbi169egEA+vTpg/bt2+Opp57CO++8g4yMDLzyyitISkpSjuKMHDkSH3/8MSZOnIhhw4Zh/fr1WLZsGVatWqXUZfz48UhMTES3bt3Qo0cPzJw5EwUFBRg6dKit2sZhXR6Oz1NnRERE1VaToWoAqnx89tlnSpnCwkL55z//KQEBAeLl5SUPPfSQnD171mo5x44dk759+4qnp6cEBQXJv/71LyktLbUqs2HDBunatatotVpp2bKl1TosPvroI2natKlotVrp0aOHbN68uSab47RD8j/ddESaTVopo77cYe+q1A8OyScicmn19futEhGxXySzL6PRCD8/PxgMBuj1entXx2beXLkPC35PxzO3t8DLCe3tXR3bKygAfHzK/87PB7y97VsfIiJqUPX1+817nzmhs8by02dhvJo1ERFRtTEUOaEM9ikiIiKqMYYiJ2QJRWEMRURERNXGUORkzGZBpuX0mZ6hiIiIqLoYipxMdkExyswCtQoI9uWFG4mIiKqLocjJWE6dBfvq4K7h20tERFRd/NV0MmcNHHlGRERUGwxFTsbSn6gx+xMRERHVCEORk8kyFgMAQvTsT0RERFQTDEVO5lxeeSgK8mEoIiIiqgmGIieTnc9QREREVBsMRU7mcijS2rkmREREjoWhyMlk55cAAIJ4jSIiIqIaYShyIiKCc5eOFAXz9BkREVGNMBQ5kbziMpSUmQGwTxEREVFNMRQ5kexLI898dG7w1GrsXBsiIiLHwlDkRJT+ROxkTUREVGMMRU6E1ygiIiKqPYYiJ8JrFBEREdUeQ5ETUUKRL0+fERER1RRDkRPhkSIiIqLaYyhyIufyLB2tGYqIiIhqiqHIifBIERERUe0xFDkRSygK5i0+iIiIaoyhyEmIyOVQxCNFRERENcZQ5CQKSkwoKr10iw+OPiMiIqoxhiInYblwo5dWAy+tm51rQ0RE5HgYipwEO1kTERHVDUORk8hWbvHBU2dERES1wVDkJHikiIiIqG4YipzEufxLF27kcHwiIqJaYShyEhyOT0REVDcMRU5C6VPEI0VERES1wlDkJC4fKWJHayIiotpgKHIS2fm8GSwREVFdMBQ5iXN5HH1GRERUFwxFTqCguAyFpSYA7FNERERUWwxFTsDSn8jDXQ1vrcbOtSEiInJMDEVOoOKFG1UqlZ1rQ0RE5JgYipzAubzyTtbBPHVGRERUawxFToC3+CAiIqo7hiInwFBERERUdwxFToAXbiQiIqo7hiIncI63+CAiIqozhiInwKtZExER1R1DkRNgnyIiIqK6YyhyAtnKLT7Yp4iIiKi2GIocXGGJCQUlvMUHERFRXTEUOTjLqTOdmxq+Ojc714aIiMhxMRQ5uHO8xQcREZFNMBQ5uGwOxyciIrIJhiIHZxmOzws3EhER1Q1DkYNTLtzI4fhERER1wlDk4HiNIiIiItuocSjatGkT7r//foSHh0OlUuH777+3mi8imDJlCho3bgxPT0/ExcXh0KFDVmVycnIwePBg6PV6+Pv7Y/jw4cjPz7cqs2vXLtx+++3w8PBAZGQk3nnnnUp1Wb58OaKjo+Hh4YFOnTph9erVNd0ch5eVVwQACNEzFBEREdVFjUNRQUEBunTpgtmzZ1c5/5133sGHH36IefPmYcuWLfD29kZ8fDyKioqUMoMHD8bevXuxbt06rFy5Eps2bcKzzz6rzDcajejTpw+aNWuG1NRUvPvuu3jttdcwf/58pcyff/6JQYMGYfjw4di5cyf69++P/v37Y8+ePTXdJIeWaSw/UhTi62HnmhARETk4qQMA8t133ynPzWazhIWFybvvvqtMy83NFZ1OJ1999ZWIiOzbt08AyLZt25QyP/30k6hUKjl9+rSIiMyZM0cCAgKkuLhYKTNp0iRp27at8vyxxx6ThIQEq/r07NlTnnvuuWrX32AwCAAxGAzVfs2NJvbtX6TZpJWy88QFe1el4eTniwDlj/x8e9eGiIgaWH39ftu0T1F6ejoyMjIQFxenTPPz80PPnj2RkpICAEhJSYG/vz+6deumlImLi4NarcaWLVuUMnfccQe02ssjquLj43Hw4EFcuHBBKVNxPZYylvVUpbi4GEaj0erhyMxmQdaljtahPH1GRERUJzYNRRkZGQCA0NBQq+mhoaHKvIyMDISEhFjNd3NzQ2BgoFWZqpZRcR1XK2OZX5Xp06fDz89PeURGRtZ0E28oORdLUGYWqFTsaE1ERFRXLjX6bPLkyTAYDMrj5MmT9q5SnWQay/tpNfLWwV3jUm8lERGRzdn0lzQsLAwAkJmZaTU9MzNTmRcWFoasrCyr+WVlZcjJybEqU9UyKq7jamUs86ui0+mg1+utHo4sy8hTZ0RERLZi01DUokULhIWFITk5WZlmNBqxZcsWxMbGAgBiY2ORm5uL1NRUpcz69ethNpvRs2dPpcymTZtQWlqqlFm3bh3atm2LgIAApUzF9VjKWNbjCixHikL1HHlGRERUVzUORfn5+UhLS0NaWhqA8s7VaWlpOHHiBFQqFcaOHYs333wTP/74I3bv3o0hQ4YgPDwc/fv3BwC0a9cO9913H5555hls3boVf/zxB0aNGoXHH38c4eHhAIAnnngCWq0Ww4cPx969e7F06VLMmjUL48ePV+oxZswYrFmzBu+99x4OHDiA1157Ddu3b8eoUaPq3ioOIpNHioiIiGynpsPVNmzYIAAqPRITE0WkfFj+q6++KqGhoaLT6eSee+6RgwcPWi3j/PnzMmjQIPHx8RG9Xi9Dhw6VvLw8qzJ//fWX3HbbbaLT6aRJkyYyY8aMSnVZtmyZtGnTRrRarXTo0EFWrVpVo21x9CH5k7/dJc0mrZT3fz54/cLOhEPyiYhcWn39fqtEROyYyezKaDTCz88PBoPBIfsXjfh8G37Zn4W3H+qEJ3o2tXd1Gk5BAeDjU/53fj7g7W3f+hARUYOqr99vDllyYDx9RkREZDsMRQ6MHa2JiIhsh6HIQZWZzMjOv3TfMx4pIiIiqjOGIgd1vqAEZgE0ahUaeTMUERER1RVDkYOynDoL9tFBo1bZuTZERESOj6HIQbGTNRERkW0xFDkoy5GiEHayJiIisgmGIgeVpYw845EiIiIiW2AoclAZllDkyyNFREREtsBQ5KAu9yliKCIiIrIFhiIHdblPEU+fERER2QJDkYPKyis/UhTmxyNFREREtsBQ5ICKy0zIKSgBAISwTxEREZFNMBQ5oDO55afOPN01CPByt3NtiIiInANDkQM6faEQANAkwBMqFa9mTUREZAsMRQ7o1IWLAICIAE8714SIiMh5MBQ5oNO5l44U+TMUERER2QpDkQM6den0WUSAl51rQkRE5DwYihxQxT5FREREZBsMRQ6IfYqIiIhsj6HIwZSazMp9zxiKiIiIbIehyMFkGIpgFkDrpkaQN2/xQUREZCsMRQ7mpOXUmb8n1Gpeo4iIiMhWGIocDDtZExER1Q+GIgdzeTg+QxEREZEtMRQ5GF64kYiIqH4wFDmYy8PxeeFGIiIiW2IocjCn2KeIiIioXjAUOZAykxkZBl6jiIiIqD4wFDmQs4YilJkF7hoVQnw97F0dIiIip8JQ5ECOnS8AADQN9IKG1ygiIiKyKYYiB3IsuzwUtQjytnNNiIiInA9DkQM5ylBERERUbxiKHIjlSFFzhiIiIiKbYyhyIOk8UkRERFRvGIocRKnJjJOXrlHEUERERGR7DEUO4tSFQpjMAk93DUI5HJ+IiMjmGIocRHp2PgCgWSMvqDkcn4iIyOYYihzE0XPl/YlaBvPUGRERUX1gKHIQR86VHymKCvaxc02IiIicE0ORg/g7szwUtQ71tXNNiIiInBNDkQMQEfydmQcAaB3CI0VERET1gaHIAWQai5FXVAaNWsU+RURERPWEocgBWI4SNWvkBZ2bxs61ISIick4MRQ7AEorahLA/ERERUX1hKHIAe88YAQDRjRmKiIiI6gtDkQNIO5kLAOga6W/XehARETkzhqIbXO7FEuVGsAxFRERE9Yeh6Ab31ykDAKB5Iy/4e2ntXBsiIiLnxVB0g0s7kQuAR4mIiIjqG0PRDe6vU7kAGIqIiIjqG0PRDaykzIztx3IAAF0YioiIiOoVQ9EN7I/D2TAWlSHYV4fOEf72rg4REZFTYyi6ga3YdQYA0K9jGDRqlZ1rQ0RE5NwcPhTNnj0bzZs3h4eHB3r27ImtW7fau0o2YSgsxdo9GQCAhM7hdq4NERGR83PoULR06VKMHz8eU6dOxY4dO9ClSxfEx8cjKyvL3lWrs083HUVBiQnRYb7o3jzA3tUhIiJyeioREXtXorZ69uyJ7t274+OPPwYAmM1mREZG4oUXXsCLL75YqXxxcTGKi4uV50ajEZGRkTAYDNDr9Tar1/s/H4SxqAwiAgEgAgjk0r/lz2F5XmGe+dLfFwpKsOHgOQDA7CduRkLnxjarm1MoKAB8fMr/zs8HvL3tWx8iImpQRqMRfn5+Nv/9drPZkhpYSUkJUlNTMXnyZGWaWq1GXFwcUlJSqnzN9OnTMW3atHqv21fbTuJcXvH1C17Hc71bol+nMBvUiIiIiK7HYUNRdnY2TCYTQkNDraaHhobiwIEDVb5m8uTJGD9+vPLccqTI1obe2hwXi01QqQAVAKhUUJX/AxVUynSVClCpyjtQq1WXp2vUKvRq2Qgdm/jZvG5ERERUNYcNRbWh0+mg0+nqfT3/vLNVva+DiIiIbMthO1oHBQVBo9EgMzPTanpmZibCwnjKiYiIiGrGYUORVqtFTEwMkpOTlWlmsxnJycmIjY21Y82IiIjIETn06bPx48cjMTER3bp1Q48ePTBz5kwUFBRg6NCh9q4aERERORiHDkUDBw7EuXPnMGXKFGRkZKBr165Ys2ZNpc7XRERERNfj0Ncpqqv6us4B1TNep4iIyKXV1++3w/YpIiIiIrIlhiIiIiIiMBQRERERAWAoIiIiIgLAUEREREQEgKGIiIiICABDEREREREAhiIiIiIiAA5+Reu6sly30mg02rkmVCMFBZf/NhoBk8l+dSEiogZn+d229fWnXToU5eXlAQAiIyPtXBOqtfBwe9eAiIjsJC8vD35+fjZbnkvf5sNsNuPMmTPw9fWFSqWy2XKNRiMiIyNx8uRJ3j7kErZJZWyTytgmVWO7VMY2qcyV2kREkJeXh/DwcKjVtusJ5NJHitRqNSIiIupt+Xq93uk/mDXFNqmMbVIZ26RqbJfK2CaVuUqb2PIIkQU7WhMRERGBoYiIiIgIAENRvdDpdJg6dSp0Op29q3LDYJtUxjapjG1SNbZLZWyTytgmdefSHa2JiIiILHikiIiIiAgMRUREREQAGIqIiIiIADAUEREREQFgKCIiIiICwFBUL2bPno3mzZvDw8MDPXv2xNatW+1dpXoxffp0dO/eHb6+vggJCUH//v1x8OBBqzJFRUVISkpCo0aN4OPjgwEDBiAzM9OqzIkTJ5CQkAAvLy+EhIRgwoQJKCsra8hNqTczZsyASqXC2LFjlWmu2CanT5/Gk08+iUaNGsHT0xOdOnXC9u3blfkigilTpqBx48bw9PREXFwcDh06ZLWMnJwcDB48GHq9Hv7+/hg+fDjy8/MbelNswmQy4dVXX0WLFi3g6emJqKgovPHGG1Y3t3SFNtm0aRPuv/9+hIeHQ6VS4fvvv7eab6s22LVrF26//XZ4eHggMjIS77zzTn1vWq1dq01KS0sxadIkdOrUCd7e3ggPD8eQIUNw5swZq2U4W5s0KCGbWrJkiWi1Wvm///s/2bt3rzzzzDPi7+8vmZmZ9q6azcXHx8tnn30me/bskbS0NOnXr580bdpU8vPzlTIjR46UyMhISU5Olu3bt0uvXr3klltuUeaXlZVJx44dJS4uTnbu3CmrV6+WoKAgmTx5sj02yaa2bt0qzZs3l86dO8uYMWOU6a7WJjk5OdKsWTN5+umnZcuWLXL06FFZu3atHD58WCkzY8YM8fPzk++//17++usveeCBB6RFixZSWFiolLnvvvukS5cusnnzZvntt9+kVatWMmjQIHtsUp299dZb0qhRI1m5cqWkp6fL8uXLxcfHR2bNmqWUcYU2Wb16tbz88svy7bffCgD57rvvrObbog0MBoOEhobK4MGDZc+ePfLVV1+Jp6enfPLJJw21mTVyrTbJzc2VuLg4Wbp0qRw4cEBSUlKkR48eEhMTY7UMZ2uThsRQZGM9evSQpKQk5bnJZJLw8HCZPn26HWvVMLKysgSA/PrrryJSvgO7u7vL8uXLlTL79+8XAJKSkiIi5V8AarVaMjIylDJz584VvV4vxcXFDbsBNpSXlyetW7eWdevWSe/evZVQ5IptMmnSJLntttuuOt9sNktYWJi8++67yrTc3FzR6XTy1VdfiYjIvn37BIBs27ZNKfPTTz+JSqWS06dP11/l60lCQoIMGzbMatrDDz8sgwcPFhHXbJMrA4Ct2mDOnDkSEBBgte9MmjRJ2rZtW89bVHdVBcUrbd26VQDI8ePHRcT526S+8fSZDZWUlCA1NRVxcXHKNLVajbi4OKSkpNixZg3DYDAAAAIDAwEAqampKC0ttWqP6OhoNG3aVGmPlJQUdOrUCaGhoUqZ+Ph4GI1G7N27twFrb1tJSUlISEiw2nbANdvkxx9/RLdu3fDoo48iJCQEN910Ez799FNlfnp6OjIyMqzaxM/PDz179rRqE39/f3Tr1k0pExcXB7VajS1btjTcxtjILbfcguTkZPz9998AgL/++gu///47+vbtC8A12+RKtmqDlJQU3HHHHdBqtUqZ+Ph4HDx4EBcuXGigrak/BoMBKpUK/v7+ANgmdeVm7wo4k+zsbJhMJqsfMwAIDQ3FgQMH7FSrhmE2mzF27Fjceuut6NixIwAgIyMDWq1W2VktQkNDkZGRoZSpqr0s8xzRkiVLsGPHDmzbtq3SPFdsk6NHj2Lu3LkYP348XnrpJWzbtg2jR4+GVqtFYmKisk1VbXPFNgkJCbGa7+bmhsDAQIdskxdffBFGoxHR0dHQaDQwmUx46623MHjwYABwyTa5kq3aICMjAy1atKi0DMu8gICAeql/QygqKsKkSZMwaNAg6PV6AGyTumIoIptISkrCnj178Pvvv9u7KnZ18uRJjBkzBuvWrYOHh4e9q3NDMJvN6NatG95++20AwE033YQ9e/Zg3rx5SExMtHPt7GPZsmVYvHgxvvzyS3To0AFpaWkYO3YswsPDXbZNqGZKS0vx2GOPQUQwd+5ce1fHafD0mQ0FBQVBo9FUGkmUmZmJsLAwO9Wq/o0aNQorV67Ehg0bEBERoUwPCwtDSUkJcnNzrcpXbI+wsLAq28syz9GkpqYiKysLN998M9zc3ODm5oZff/0VH374Idzc3BAaGupybdK4cWO0b9/ealq7du1w4sQJAJe36Vr7TVhYGLKysqzml5WVIScnxyHbZMKECXjxxRfx+OOPo1OnTnjqqacwbtw4TJ8+HYBrtsmVbNUGzrY/AZcD0fHjx7Fu3TrlKBHgum1iKwxFNqTVahETE4Pk5GRlmtlsRnJyMmJjY+1Ys/ohIhg1ahS+++47rF+/vtLh2JiYGLi7u1u1x8GDB3HixAmlPWJjY7F7926rndiyk1/5Q+oI7rnnHuzevRtpaWnKo1u3bhg8eLDyt6u1ya233lrpUg1///03mjVrBgBo0aIFwsLCrNrEaDRiy5YtVm2Sm5uL1NRUpcz69ethNpvRs2fPBtgK27p48SLUauuvX41GA7PZDMA12+RKtmqD2NhYbNq0CaWlpUqZdevWoW3btg55msgSiA4dOoRffvkFjRo1sprvim1iU/bu6e1slixZIjqdThYuXCj79u2TZ599Vvz9/a1GEjmL559/Xvz8/GTjxo1y9uxZ5XHx4kWlzMiRI6Vp06ayfv162b59u8TGxkpsbKwy3zL8vE+fPpKWliZr1qyR4OBghx1+XpWKo89EXK9Ntm7dKm5ubvLWW2/JoUOHZPHixeLl5SVffPGFUmbGjBni7+8vP/zwg+zatUsefPDBKode33TTTbJlyxb5/fffpXXr1g41/LyixMREadKkiTIk/9tvv5WgoCCZOHGiUsYV2iQvL0927twpO3fuFADy/vvvy86dO5WRVLZog9zcXAkNDZWnnnpK9uzZI0uWLBEvL68bdvj5tdqkpKREHnjgAYmIiJC0tDSr792KI8mcrU0aEkNRPfjoo4+kadOmotVqpUePHrJ582Z7V6leAKjy8dlnnyllCgsL5Z///KcEBASIl5eXPPTQQ3L27Fmr5Rw7dkz69u0rnp6eEhQUJP/617+ktLS0gbem/lwZilyxTVasWCEdO3YUnU4n0dHRMn/+fKv5ZrNZXn31VQkNDRWdTif33HOPHDx40KrM+fPnZdCgQeLj4yN6vV6GDh0qeXl5DbkZNmM0GmXMmDHStGlT8fDwkJYtW8rLL79s9cPmCm2yYcOGKr9DEhMTRcR2bfDXX3/JbbfdJjqdTpo0aSIzZsxoqE2ssWu1SXp6+lW/dzds2KAsw9napCGpRCpcQpWIiIjIRbFPEREREREYioiIiIgAMBQRERERAWAoIiIiIgLAUEREREQEgKGIiIiICABDEREREREAhiIiIiIiAAxFRERERAAYioiIiIgAMBQRERERAQD+P70xpn+pwq6HAAAAAElFTkSuQmCC",
|
115 |
+
"text/plain": [
|
116 |
+
"<Figure size 640x480 with 1 Axes>"
|
117 |
+
]
|
118 |
+
},
|
119 |
+
"metadata": {},
|
120 |
+
"output_type": "display_data"
|
121 |
+
}
|
122 |
+
],
|
123 |
+
"source": [
|
124 |
+
"plt.plot([len([l for l in lens if l <= m]) for m in range(max(lens) + 1)])\n",
|
125 |
+
"plt.title(\"Number of fully covered examples as a function of max length\")\n",
|
126 |
+
"plt.axvline(x=256, color=\"red\")"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"attachments": {},
|
131 |
+
"cell_type": "markdown",
|
132 |
+
"metadata": {},
|
133 |
+
"source": [
|
134 |
+
"Percentage of tokens left out:"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"cell_type": "code",
|
139 |
+
"execution_count": 4,
|
140 |
+
"metadata": {},
|
141 |
+
"outputs": [
|
142 |
+
{
|
143 |
+
"data": {
|
144 |
+
"text/plain": [
|
145 |
+
"<matplotlib.lines.Line2D at 0x7f6eef392020>"
|
146 |
+
]
|
147 |
+
},
|
148 |
+
"execution_count": 4,
|
149 |
+
"metadata": {},
|
150 |
+
"output_type": "execute_result"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"data": {
|
154 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhYAAAGzCAYAAABzfl4TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABHN0lEQVR4nO3deVxU9f4/8NfMwAz7sIMoAqJprplb5q6UmllWWnnN1G7eb2WlZZv1q7Qytc26btmmZaVmN225mbt5K/dyV8IVXFGRHQaYef/+gBkZBpCBmTkw83o+nIfMmbO8z4eBeXHO53yOSkQERERERA6gVroAIiIich8MFkREROQwDBZERETkMAwWRERE5DAMFkREROQwDBZERETkMAwWRERE5DAMFkREROQwDBZERETkMAwWbkKlUuHxxx9XugzyICUlJXjuuecQGxsLtVqNYcOGKV1SpRYvXgyVSoWTJ08qXUq13n77bTRr1gwajQY33HCD0uXUycmTJ6FSqbB48WKlS7mm+Ph43H777UqX4VYYLBSkUqlq9Ni8ebPSpRLZ+Oyzz/D2229j+PDh+Pzzz/HUU08pWs+bb76JVatWKVpDba1duxbPPfccevTogUWLFuHNN99UuiS3cujQIUydOrXeh0t34aV0AZ5syZIlVs+/+OILrFu3zmb69ddf78qyiGpk48aNaNy4MWbPnq10KQBKg8Xw4cNtjpyMHj0a999/P3Q6nTKF1cDGjRuhVqvx6aefQqvVKl2O2zl06BCmTZuGvn37Ij4+Xuly3B6DhYIeeOABq+fbtm3DunXrbKaTc+Xl5cHf31/pMhqc9PR0BAcHK13GNWk0Gmg0GqXLqFZ6ejp8fX0ZKsgt8FRIPZeXl4fJkycjNjYWOp0OLVu2xDvvvIOa3JT2jTfegFqtxpw5cyzTVq9ejV69esHf3x+BgYEYMmQIDh48aLXc2LFjERAQgDNnzmDYsGEICAhAREQEnnnmGRiNxhrVvXr1avTp0weBgYEICgpCly5d8PXXX1vNs2LFCnTq1Am+vr4IDw/HAw88gDNnzlhef+edd6BSqXDq1Cmb9U+ZMgVarRZXrlyxTNu+fTsGDRoEvV4PPz8/9OnTB7///rvVclOnToVKpcKhQ4fwj3/8AyEhIejZsycAYN++fRg7diyaNWsGHx8fREdH46GHHsLly5dttr9582Z07twZPj4+SExMxMKFCy3rrujLL7+07GdoaCjuv/9+pKWlXbMNT506hcceewwtW7aEr68vwsLCMGLECJvDucXFxZg2bRpatGgBHx8fhIWFoWfPnli3bl2168/IyMAzzzyDdu3aISAgAEFBQRg8eDD27t1b7XLm8+ebNm3CwYMHrU7Zbd68udLTd5Wdc7fnfWYymfDBBx+gXbt28PHxQUREBAYNGoRdu3YBKD2tmJeXh88//9xSz9ixYwFU3cdi/vz5aNOmDXQ6HWJiYjBhwgRkZmZazdO3b1+0bdsWhw4dQr9+/eDn54fGjRvjrbfeqraNzEpKSvD6668jMTEROp0O8fHxePHFF2EwGCzzqFQqLFq0CHl5eZbaq+ubYK5p37596NOnD/z8/NC8eXN8++23AIBff/0V3bp1g6+vL1q2bIn169dbLV+T95WIoF+/foiIiEB6erplelFREdq1a4fExETk5eXVqA3KO3LkCIYPH47Q0FD4+Pigc+fO+OGHH6zmMX+/fv/9dzz99NOIiIiAv78/7rrrLly8eNFqXpPJhKlTpyImJgZ+fn7o168fDh06hPj4eKvv/4gRIwAA/fr1q/IU82+//YauXbvCx8cHzZo1wxdffGH3/lEZoXpjwoQJUv5bYjKZpH///qJSqeThhx+WuXPnytChQwWATJo0yWpZADJhwgTL85deeklUKpV89NFHlmlffPGFqFQqGTRokMyZM0dmzZol8fHxEhwcLCdOnLDMN2bMGPHx8ZE2bdrIQw89JAsWLJB77rlHAMj8+fOvuR+LFi0SlUolbdu2lenTp8u8efPk4YcfltGjR1vNA0C6dOkis2fPlhdeeEF8fX0lPj5erly5IiIip06dEpVKJW+99ZbNNpo1ayZDhgyxPN+wYYNotVrp3r27vPvuuzJ79mxp3769aLVa2b59u2W+V199VQBI69at5c4775T58+fLvHnzRETknXfekV69eslrr70mH330kUycOFF8fX2la9euYjKZLOv4888/RafTSXx8vMycOVOmT58uMTEx0qFDB6n4I/XGG2+ISqWS++67T+bPny/Tpk2T8PBwq/2syooVK6RDhw7yyiuvyEcffSQvvviihISESFxcnOTl5Vnme/HFF0WlUsn48ePl448/lnfffVdGjhwpM2fOrHb9O3fulMTERHnhhRdk4cKF8tprr0njxo1Fr9fLmTNnqlwuNzdXlixZIq1atZImTZrIkiVLZMmSJXL+/HnZtGmTAJBNmzZZLXPixAkBIIsWLbJMs+d9NnbsWAEggwcPlvfff1/eeecdufPOO2XOnDkiIrJkyRLR6XTSq1cvSz1//PGHiFx9r5V/j5vfB0lJSTJnzhx5/PHHRaPRSJcuXaSoqMgyX58+fSQmJkZiY2Nl4sSJMn/+fOnfv78AkJ9//rna9jXvIwAZPny4zJs3Tx588EEBIMOGDbPMs2TJEunVq5fodDpL7ceOHatyneVrevbZZ2XOnDnSunVr0Wg0smzZMomOjpapU6fK+++/b/l+ZmdnW5av6fvq+PHjEhAQIHfddZdl2gsvvCAqlUp+/fXXave7su/3gQMHRK/XS+vWrWXWrFkyd+5c6d27t6hUKvnuu+8s85m/Xx07dpT+/fvLnDlzZPLkyaLRaOTee++12s5zzz0nAGTo0KEyd+5cGT9+vDRp0kTCw8NlzJgxIiJy7NgxefLJJwWAvPjii1bvVxGRuLg4admypURFRcmLL74oc+fOlRtvvFFUKpUcOHCg2v2kyjFY1CMVg8WqVasEgLzxxhtW8w0fPlxUKpUcPXrUMq18sJg8ebKo1WpZvHix5fWcnBwJDg6W8ePHW63r/Pnzotfrraabfxm+9tprVvN27NhROnXqVO0+ZGZmSmBgoHTr1k0KCgqsXjN/OBcVFUlkZKS0bdvWap6ffvpJAMgrr7ximda9e3ebbe7YsUMAyBdffGFZb4sWLWTgwIFWASA/P18SEhLklltusUwzf6CMHDnSpvb8/HybaUuXLhUAsmXLFsu0oUOHip+fn9WHb0pKinh5eVl9/06ePCkajUamT59utc79+/eLl5eXzfSa1LN161arfRcR6dChg1XIqqnCwkIxGo1W006cOCE6nc7me1+ZPn36SJs2baym2RssavI+27hxowCQJ5980qaG8t9vf39/y4dJeRWDRXp6umi1Wrn11lut9n/u3LkCQD777DOrfazY3gaDQaKjo+Wee+6xbZRy9uzZIwDk4Ycftpr+zDPPCADZuHGjVVv4+/tXu76KNX399deWaUeOHBEAolarZdu2bZbpa9assWn3mr6vREQWLlwoAOTLL7+Ubdu2iUajsfmjpjKVfb8HDBgg7dq1k8LCQss0k8kkN998s7Ro0cIyzfz9SkpKsvr+PvXUU6LRaCQzM1NESn93eXl5WYU0EZGpU6cKAKv3wooVKyp9X4qUBouKP+Pp6emi0+lk8uTJ19xXssVTIfXYzz//DI1GgyeffNJq+uTJkyEiWL16tdV0EcHjjz+ODz74AF9++SXGjBljeW3dunXIzMzEyJEjcenSJctDo9GgW7du2LRpk832H3nkEavnvXr1wvHjx6uted26dcjJycELL7wAHx8fq9fMpwl27dqF9PR0PPbYY1bzDBkyBK1atcJ///tfy7T77rsPu3fvxrFjxyzTli9fDp1OhzvvvBMAsGfPHqSkpOAf//gHLl++bNm3vLw8DBgwAFu2bIHJZKp23wDA19fX8nVhYSEuXbqEm266CQDw559/AgCMRiPWr1+PYcOGISYmxjJ/8+bNMXjwYKv1fffddzCZTLj33nut2jw6OhotWrSotM2rqqe4uBiXL19G8+bNERwcbKkHAIKDg3Hw4EGkpKRUu76KdDod1Gq1Zb8uX76MgIAAtGzZ0mr9znat99l//vMfqFQqvPrqqzbLVnbq6VrWr1+PoqIiTJo0ybL/ADB+/HgEBQVZvf8AICAgwKrfk1arRdeuXa/5s/Dzzz8DAJ5++mmr6ZMnTwYAm+3YIyAgAPfff7/lecuWLREcHIzrr78e3bp1s0w3f12+1pq+rwDgX//6FwYOHIgnnngCo0ePRmJiYq2uWMnIyMDGjRtx7733Iicnx/KzcPnyZQwcOBApKSlWp0HN2y7//e3VqxeMRqPl1OiGDRtQUlKCxx57zGq5J554wu76WrdujV69elmeR0REoGXLltf8HlPlFAsWW7ZswdChQxETEwOVSlWry8REBO+88w6uu+466HQ6NG7cGNOnT3d8sQo5deoUYmJiEBgYaDXdfJVIxb4HX3zxBebNm4c5c+Zg5MiRVq+ZP3T69++PiIgIq8fatWutzqMCsJzHLi8kJMSqT0NlzAGgbdu21e4XUPrLsKJWrVpZ7deIESOgVquxfPlyAKXf8xUrVmDw4MEICgqy2rcxY8bY7Nsnn3wCg8GArKwsq+0kJCTYbDsjIwMTJ05EVFQUfH19ERERYZnPvHx6ejoKCgrQvHlzm+UrTktJSYGIoEWLFjZ1HT582KbNKyooKMArr7xi6V8THh6OiIgIZGZmWu3Pa6+9hszMTFx33XVo164dnn32Wezbt6/adQOl56dnz56NFi1aWK1/3759Nu3lLDV5nx07dgwxMTEIDQ11yDarev9ptVo0a9bM5ueqSZMmNgGmJj8Lp06dglqttnlfREdHIzg4uNK+QzVVWU16vR6xsbE20wBY1VrT95XZp59+ivz8fKSkpGDx4sVWwaSmjh49ChHByy+/bPOzYA6MFX8emjZtavU8JCTEal/M7VexfUNDQy3z1lTFbZm3d63vMVVOsatC8vLy0KFDBzz00EO4++67a7WOiRMnYu3atXjnnXfQrl07ZGRkICMjw8GVNhw9evTAnj17MHfuXNx7771Wv4jNf7EvWbIE0dHRNst6eVm/FepLL/qYmBj06tUL33zzDV588UVs27YNqampmDVrlmUe8769/fbbVQ4sFBAQYPW8sl+O9957L/744w88++yzuOGGGxAQEACTyYRBgwbZHPGoCZPJBJVKhdWrV1fanhVrquiJJ57AokWLMGnSJHTv3h16vR4qlQr333+/VT29e/fGsWPH8P3332Pt2rX45JNPMHv2bHz44Yd4+OGHq1z/m2++iZdffhkPPfQQXn/9dYSGhkKtVmPSpEm12l+g6iMIVXX6rS/vs+pUVaPUoAM1ULujKtdSVU01qbWm7yuzzZs3Wzqb7t+/H927d7e7XvN6n3nmGQwcOLDSeSoGhLq2uz1cuS1PoFiwGDx4sM2h4/IMBgNeeuklLF26FJmZmWjbti1mzZqFvn37AgAOHz6MBQsW4MCBA5a/PCr7K7Qhi4uLw/r165GTk2N11OLIkSOW18tr3rw53nrrLfTt2xeDBg3Chg0bLMslJiYCACIjI5GUlOS0ms3bOXDgQKV/1ZevOzk5Gf3797d6LTk52Wa/7rvvPjz22GNITk7G8uXL4efnh6FDh9psMygoqNb7duXKFWzYsAHTpk3DK6+8Yple8fRCZGQkfHx8cPToUZt1VJyWmJgIEUFCQgKuu+46u2v69ttvMWbMGLz77ruWaYWFhTZXLgClf6WNGzcO48aNQ25uLnr37o2pU6dWGyy+/fZb9OvXD59++qnV9MzMTISHh9tdL3D1r8qKNdblr/PExESsWbMGGRkZ1R61qOkHePn3X7NmzSzTi4qKcOLECYf9fMTFxcFkMiElJcVqLJoLFy4gMzPT5n3uKva8r86dO4cnnngCt956K7RarSUY2Fu7uZ29vb0d2r5A6c9d+d/9ly9ftjnS4IxwR1Wrt30sHn/8cWzduhXLli3Dvn37MGLECAwaNMjyi/7HH39Es2bN8NNPPyEhIQHx8fF4+OGH3eqIxW233Qaj0Yi5c+daTZ89ezZUKlWlwax9+/b4+eefcfjwYQwdOhQFBQUAgIEDByIoKAhvvvkmiouLbZareBlXbd16660IDAzEjBkzUFhYaPWaOf137twZkZGR+PDDD60uu1u9ejUOHz6MIUOGWC13zz33QKPRYOnSpVixYgVuv/12q3EnOnXqhMTERLzzzjvIzc2t1b6Z/2Kp+BfK+++/bzNfUlISVq1ahbNnz1qmHz161KbPy9133w2NRoNp06bZrFdEKr2MteK2Ki43Z84cm7/+K64nICAAzZs3t2rbmq5/xYoVNue67REXFweNRoMtW7ZYTZ8/f36t13nPPfdARDBt2jSb18rX7+/vX+mHY0VJSUnQarX497//bbX8p59+iqysLJv3X23ddtttAGzfQ++99x4AOGw79qrp+woo7XdiMpnw6aef4qOPPoKXlxf++c9/2v2XfGRkJPr27YuFCxfi3LlzNq/X5vfPgAED4OXlhQULFlhNr/j7EoDl90VN3h9Ud/VygKzU1FQsWrQIqamplg5yzzzzDH755RfLcLfHjx/HqVOnsGLFCnzxxRcwGo146qmnMHz4cGzcuFHhPXCMoUOHol+/fnjppZdw8uRJdOjQAWvXrsX333+PSZMmWf5Sr+imm27C999/j9tuuw3Dhw/HqlWrEBQUhAULFmD06NG48cYbcf/99yMiIgKpqan473//ix49elT6A2mvoKAgzJ49Gw8//DC6dOliGSti7969yM/Px+effw5vb2/MmjUL48aNQ58+fTBy5EhcuHABH3zwAeLj422Gho6MjES/fv3w3nvvIScnB/fdd5/V62q1Gp988gkGDx6MNm3aYNy4cWjcuDHOnDmDTZs2ISgoCD/++OM16+7duzfeeustFBcXo3Hjxli7di1OnDhhM+/UqVOxdu1a9OjRA48++qgl/LVt2xZ79uyxzJeYmIg33ngDU6ZMwcmTJzFs2DAEBgbixIkTWLlyJf71r3/hmWeeqbKm22+/HUuWLIFer0fr1q2xdetWrF+/HmFhYVbztW7dGn379kWnTp0QGhqKXbt24dtvv73mvWNuv/12vPbaaxg3bhxuvvlm7N+/H1999ZXVX/H20uv1GDFiBObMmQOVSoXExET89NNP1+xPUp1+/fph9OjR+Pe//42UlBTLqan//e9/6Nevn2U/O3XqhPXr1+O9995DTEwMEhISrDoymkVERGDKlCmYNm0aBg0ahDvuuAPJycmYP38+unTp4rAB6jp06IAxY8bgo48+QmZmJvr06YMdO3bg888/x7Bhw9CvXz+HbMdeNX1fLVq0CP/973+xePFiNGnSBEBpAHnggQewYMECm06T1zJv3jz07NkT7dq1w/jx49GsWTNcuHABW7duxenTp685fkpFUVFRmDhxIt59913ccccdGDRoEPbu3YvVq1cjPDzc6ijFDTfcAI1Gg1mzZiErKws6nQ79+/dHZGSkXdukGnLlJShVASArV660PDdfdujv72/18PLyslzHPH78eAEgycnJluV2794tAOTIkSOu3gWHqHi5qUjpZaJPPfWUxMTEiLe3t7Ro0ULefvttq8uwRGzHsRAR+f7778XLy0vuu+8+y2V1mzZtkoEDB4perxcfHx9JTEyUsWPHyq5duyzLVXXpm/lSzZr44Ycf5OabbxZfX18JCgqSrl27ytKlS63mWb58uXTs2FF0Op2EhobKqFGj5PTp05Wu7+OPPxYAEhgYaHMZq9lff/0ld999t4SFhYlOp5O4uDi59957ZcOGDTb7cPHiRZvlT58+LXfddZcEBweLXq+XESNGyNmzZwWAvPrqq1bzbtiwQTp27CharVYSExPlk08+kcmTJ4uPj4/Nev/zn/9Iz549Le/jVq1ayYQJE6zeu5W5cuWKjBs3TsLDwyUgIEAGDhwoR44ckbi4OKtL6d544w3p2rWrBAcHi6+vr7Rq1UqmT59uNR5DZQoLC2Xy5MnSqFEj8fX1lR49esjWrVulT58+0qdPn2qXFan8clMRkYsXL8o999wjfn5+EhISIv/3f/8nBw4cqPRy05q+z0pKSuTtt9+WVq1aiVarlYiICBk8eLDs3r3bMs+RI0ekd+/e4uvra3W5YWXjWIiUXl7aqlUr8fb2lqioKHn00Udtxhapah/HjBkjcXFx1TeQiBQXF8u0adMkISFBvL29JTY2VqZMmWJ1yWV1bVGZqmqKi4ur9LLjir8bavK+SktLE71eL0OHDrVZ31133SX+/v5y/PjxKmus7HJTkdIxJR588EGJjo4Wb29vady4sdx+++3y7bffWuYxf7927txptWxllzKXlJTIyy+/LNHR0eLr6yv9+/eXw4cPS1hYmDzyyCNWy3/88cfSrFkz0Wg0Vuupqt1q+nNAtlQiyvdOUalUWLlypWWM/+XLl2PUqFE4ePCgTaeagIAAREdH49VXX7U5rF9QUAA/Pz+sXbsWt9xyiyt3gQjDhg2r1WWfROQ4mZmZCAkJwRtvvIGXXnpJ6XI8Ur08FdKxY0cYjUakp6dbXVtcXo8ePVBSUoJjx45ZTgn8/fffAGw7NRI5WkFBgdWVJSkpKfj555+txg4hIueq+HMIXO3TYu7oT66n2BGL3NxcSy/6jh074r333kO/fv0QGhqKpk2b4oEHHsDvv/+Od999Fx07dsTFixexYcMGtG/fHkOGDIHJZEKXLl0QEBCA999/HyaTCRMmTEBQUBDWrl2rxC6RB2nUqJHlviKnTp3CggULYDAY8Ndff6FFixZKl0fkERYvXozFixfjtttuQ0BAAH777TcsXboUt956K9asWaN0eZ5LqXMw5vNlFR/mc3xFRUXyyiuvSHx8vHh7e0ujRo3krrvukn379lnWcebMGbn77rslICBAoqKiZOzYsXL58mWF9og8ydixYyUuLk50Op0EBQXJwIEDrc73E5Hz7d69WwYMGCBhYWHi7e0tTZo0kYkTJ0pOTo7SpXm0etHHgoiIiNxDvR3HgoiIiBoeBgsiIiJyGJdfFWIymXD27FkEBgZymFUiIqIGQkSQk5ODmJgYqzsDV+TyYHH27FmbO/ARERFRw5CWlmYZjbUyLg8W5ptipaWlWW57TQ1AXh5QNrw6zp4Fyt2rg4iI3F92djZiY2OtbopZGZcHC/Ppj6CgIAaLhqT8CKhBQQwWREQe6lrdGNh5k4iIiByGwYKIiIgchsGCiIiIHIbBgoiIiByGwYKIiIgchsGCiIiIHIbBgoiIiByGwYKIiIgchsGCiIiIHIbBgoiIiByGwYKIiIgchsGCiIiIHMblNyEjouqJCEpMAqNJYBKBSQCTCMQEGMU8TSACyzzlvzZJ6TqMIjCZcPV1y3Kl85jnR+k/iHk7ZTUIUPZa6fJls1rWB/N0mF8r/1ysp5WfbvW67bIom8dkknKvly6DCvNb2symDcu/JlW+VlnbV72eqrdRcTu2r1W9YF1ql2pfq7r4apqgViq2W93X59DVOWF/Hbw+h1cIPH3LdQj08Xb4emuCwYI8lskkKCwxIs9gRH5RieV/Q4kJRSUmGEqMMJSYrj6KjSgymmAoNtnMY/66xFgaCkpMpnJfC0qM5uemsuelXxtNgmJj2eumq4GCiKguHu2byGBBZA8RQY6hBFn5xcgqsH5klpuWXVCM7MJi5BcZyx5XA0R+kVHp3ag1lQrQqFRQq1RQqQC1SgWN2vprtar09sal85V+rVbbLqdSASqU/a9SQVW2fqvpZRtVl31tNR9KJ5qfV1wnKsyvLvc1yq2//DJqVfl1lt/W1ecwb7tcm1i1UYX2sn7NekL5123vCF2zbdisp7pt2CxX9W2o7aq9unoqrKj6G1/XgoNXWHE/67w+h9fn4PU5eIV+WuU+3hksqF4REWTkFeFMZgHOXCnAxVwDLuYYcKns/4u5RbiUY8DFXAOKSkwO266/VgM/nRf8tBr4eGmg81ZDq1FD562GzktT7ms1tF6l0yr72lujgkZt/l8FL7UaXmoVvDRlX2tUZc8rTK9kHrW6NBRYBQbV1QBARFQfMViQyxUbTUjLyMexi3k4fjEXaVfycfpKaZA4faUABcU1P5Kg81JD7+sNva83gv1K/w8qe25+BPp4I0CngZ/WC/7m/7Ve8NVq4K8rDRJqNT+oiYgcgcGCnEZEkJZRgANns3DwbBZSLuTi2MVcnLqcj5Jr9COICtIhJtgXUYE+iAjUITxAV/a/1vI8PEAHX63GRXtDREQ1wWBBDnM2swA7T2Zg/+mssjCRjZzCkkrn9fXWIDHSH83CAxAX5ofGwb5oEuKHJiG+aBTsA50XAwMRUUPEYEG1IiI4cSkPW49fxq6TV7DjRAbOZBbYzKfVqNGqUSDaxAShZVQgEiMDkBgRgOggH55+ICJyQwwWZLfXfjyI9al5SM3It5quVgFtYvTo2DQYbRvr0TZGjxZRAfDWcBw2IiJPwWBB1co1lGDD4QtYu/M45pVNW7ojDQVaH3hrVLixaQi6JYSiS0IoOjYNQYCObykiIk/GTwGyUWI0YXPyRXy7+zQ2JafDUGKCb1Gh5fX7uzbBze3jcXNiGPwZJIiIqBx+KpDF2cwCLNuZhm92puF89tUgkRDuj2HXxQCzS5+/OrQt4O+vUJVERFSfMVgQ/r6Qgw83H8P3e89ahpMO9dfinhsbY1jHxmjdKAiq/PxrrIWIiIjBwqP9fSEH76xJxtpDFyzTuiWE4oGb4nBrmyhe8klERHZjsPBA6dmFeG/d3/hmVxpMUjpG/aA20XikTyI6xAYrXR4RETVgDBYexGQSfLX9FGb9koxcQ+nAVYPaROOZgS3RPDJA4eqIiMgdMFh4iKPpOXju2334MzUTANAhNhgvD7keneNDlS2MiIjcCoOFmxMRfLMrDa/+cBCFxSYE6Lzw3KCWGNUtDhqOfElERA7GYOHGcg0lmPLdfvy49ywAoFeLcLw1vD0a6X0VroyIiNwVg4WbOpNZgH8u3okj53OgUavwzK0t8X+9m/H+HERE5FQMFm5ob1om/vn5LlzKNSA8QIeFo29Epzj2pSAiIudjsHAz245fxkOLdyK/yIhW0YH4dGwXNA7mqQ8iInINBgs38r+Uixj/xS4UFpvQs3k4PhzdiTcFIyIil+Knjpv449gl/PPzXSgqMaFfywgseKATfLw5ciYREbkWg4UbOHg2C//6YjeKSky4pXUU5v3jRmi91EqXRUREHoifPg1cWkY+xi7aiVxDCbomhGLOyI4MFUREpBh+AjVg+UUlePjzXbiYY0DLqEB8/GBnnv4gIiJFMVg0UCKC5/+zH8kXchARqMPih7pA7+utdFlEROThGCwaqM9+P4kf956Fl1qF+aNu5GiaRERULzBYNECHz2Vj1uojAID/N+R6dOGNxIiIqJ5gsGhgDCVGPLV8D4qMJiRdH4kxN8crXRIREZGF3cHizJkzeOCBBxAWFgZfX1+0a9cOu3btckZtVInZ61Jw5HwOwvy1mHF3e6hUvPcHERHVH3aNY3HlyhX06NED/fr1w+rVqxEREYGUlBSEhIQ4qz4qZ09aJhZuOQYAePPudogI1ClcERERkTW7gsWsWbMQGxuLRYsWWaYlJCQ4vCiyZTQJ/t+q/RABht0Qg4FtopUuiYiIyIZdp0J++OEHdO7cGSNGjEBkZCQ6duyIjz/+uNplDAYDsrOzrR5kv6+2n8KBM9kI8vHCS0NaK10OERFRpewKFsePH8eCBQvQokULrFmzBo8++iiefPJJfP7551UuM2PGDOj1essjNja2zkV7mos5Bry9JhkA8OygVjwFQkRE9ZZKRKSmM2u1WnTu3Bl//PGHZdqTTz6JnTt3YuvWrZUuYzAYYDAYLM+zs7MRGxuLrKwsBAUF1aF0z/H/Vu3Hl9tS0a6xHqsm9IBGrUCHzbw8ICCg9OvcXMDf3/U1EBGRYrKzs6HX66/5+W3XEYtGjRqhdWvrw/DXX389UlNTq1xGp9MhKCjI6kE1d+xiLpbuSAMAvDTkemVCBRERUQ3ZFSx69OiB5ORkq2l///034uLiHFoUXfX2L8kwmgQDWkXipmZhSpdDRERULbuCxVNPPYVt27bhzTffxNGjR/H111/jo48+woQJE5xVn0f7M/UKfjl4HmoV8PzgVkqXQ0REdE12BYsuXbpg5cqVWLp0Kdq2bYvXX38d77//PkaNGuWs+jzavzekAADuubEJrosKVLgaIiKia7NrHAsAuP3223H77bc7oxYqZ9/pTGxOvgi1CpjQr7nS5RAREdUI7xVST83ZeBQAcOcNjREfziswiIioYWCwqIcOn8vGukMXoOLRCiIiamAYLOqhT/53AgBwW7tGaB4ZoHA1RERENcdgUc9czDHgx71nAQAP9+R9WIiIqGFhsKhnvt6eiiKjCTfEBqNjU941loiIGhYGi3rEUGLEkm2nAADjesQrWwwREVEtMFjUI//ddw6Xcg2ICtLhtnaNlC6HiIjIbgwW9cjnf5wEAIy+KQ7eGn5riIio4eGnVz1x+Fw29p7OgrdGhfu7NlW6HCIiolphsKgnlu8svYNp0vVRCA/QKVwNERFR7TBY1AOGEiNW7TkDALi3S6zC1RAREdUeg0U9sPbgBWTmF6OR3ge9W0QoXQ4REVGtMVjUA9/sKj0NMrxTE2jUKoWrISIiqj0GC4WdvpKP345eAgCM6MTTIERE1LAxWCjsx73nIALc1CwUTcP8lC6HiIioThgsFPZD2X1B7ryhscKVEBER1R2DhYKOpufg8LlseKlVGNw2WulyiIiI6ozBQkE/7D0HAOh9XQSC/bQKV0NERFR3DBYKERHL7dHv6BCjcDVERESOwWChkINns3HiUh50XmoktY5SuhwiIiKHYLBQiPloRdL1UQjQeSlcDRERkWMwWChARPDLwfMAgCHteXt0IiJyHwwWCki+kINTl/Oh81Kjb0sO4U1ERO6DwUIBaw9eAAD0ahEOPy1PgxARkftgsFDA2kOlp0Fubc2xK4iIyL0wWLjYmcwCHDiTDbUKGHB9pNLlEBERORSDhYutK+u02TkuFGEBOoWrISIiciwGCxdbe6i0f8WtbTh2BRERuR8GCxfKyi/G9hMZAIBbOCgWERG5IQYLF/rf0YswmgQtIgMQF+avdDlEREQOx2DhQpuTLwIAx64gIiK3xWDhIiaT4Ne/S4NFn+t4NQgREbknBgsXOXQuGxdzDPDTatAlIUTpcoiIiJyCwcJFzEcrbk4Mg85Lo3A1REREzsFg4SKbk9MBAH1a8jQIERG5LwYLF8gqKMafqZkAgL7XseMmERG5LwYLF/gt5RKMJkFihD9iQ/2ULoeIiMhpGCxc4LejvBqEiIg8A4OFC/x+9DIAoGeLMIUrISIici4GCydLy8hHakY+NGoVusSHKl0OERGRU9kVLKZOnQqVSmX1aNWqlbNqcwtbj5UerejQRI9AH2+FqyEiInIuL3sXaNOmDdavX391BV52r8Kj/H7sEgCgR/NwhSshIiJyPrtTgZeXF6Kjo2s8v8FggMFgsDzPzs62d5MNlojgj7IjFt0T2b+CiIjcn919LFJSUhATE4NmzZph1KhRSE1NrXb+GTNmQK/XWx6xsbG1LrahOZqei4s5Bui81LixKYfxJiIi92dXsOjWrRsWL16MX375BQsWLMCJEyfQq1cv5OTkVLnMlClTkJWVZXmkpaXVueiG4vejpadBusSHwsebw3gTEZH7s+tUyODBgy1ft2/fHt26dUNcXBy++eYb/POf/6x0GZ1OB51OV7cqGyieBiEiIk9Tp8tNg4ODcd111+Ho0aOOqsdtmEyCbcdLg8XNDBZEROQh6hQscnNzcezYMTRq1MhR9biN5As5yC4sgZ9Wg3aN9UqXQ0RE5BJ2BYtnnnkGv/76K06ePIk//vgDd911FzQaDUaOHOms+hqsnSczAAA3Ng2Bl4bjkBERkWewq4/F6dOnMXLkSFy+fBkRERHo2bMntm3bhogI3rGzoh0nSoMFR9skIiJPYlewWLZsmbPqcCsiYjli0SWBl5kSEZHn4DF6Jzh9pQAXsg3w1qjQMZbBgoiIPAeDhROYT4O0bayHr5bjVxARkedgsHAC82mQruxfQUREHobBwgl2nGTHTSIi8kwMFg52KdeA4xfzAACd49m/goiIPAuDhYPtOnkFAHBdVACC/bQKV0NERORaDBYO9ldaabDoFMejFURE5HkYLBxsT2omAPAyUyIi8kgMFg5kNAn2n8kCANzQNFjZYoiIiBTAYOFAf1/IQX6REQE6LyRGBChdDhERkcsxWDjQX2WnQdo30UOjVilbDBERkQIYLBxoT1nHzRtig5UthIiISCEMFg60Jy0TAIMFERF5LgYLB8kpLEZKei4AdtwkIiLPxWDhIPtPZ0EEaBzsi8hAH6XLISIiUgSDhYP8ZT4NwqMVRETkwRgsHOQvy8BYwYrWQUREpCQGCwc5UDYwVvsmwcoWQkREpCAGCwe4lGvA+exCAEDrmCCFqyEiIlIOg4UDHDybDQBoFu6PAJ2XwtUQEREph8HCAQ6eLT0NwqMVRETk6RgsHODgmdIjFm0b6xWuhIiISFkMFg5gPmLRhkcsiIjIwzFY1FF2YTFOXs4HALSJ4RELIiLybAwWdXSorONmjN4Hof5ahashIiJSFoNFHZmvCGnD/hVEREQMFnV1sGxgrLY8DUJERMRgUVeWIxbsuElERMRgUReFxUYcvVh6q3ReakpERMRgUSfJ53NgNAnC/LWICtIpXQ4REZHiGCzqIPl8DgCgVaNAqFQqhashIiJSHoNFHSRfKA0W10UFKlwJERFR/cBgUQd/lwWLlgwWREREABgs6uRI2amQltEMFkRERACDRa1l5BXhYo4BANCCRyyIiIgAMFjUmrnjZpMQXwTovBSuhoiIqH5gsKglc/+KVjwNQkREZMFgUUu8IoSIiMgWg0UtJbPjJhERkY06BYuZM2dCpVJh0qRJDiqnYRAR/M1gQUREZKPWwWLnzp1YuHAh2rdv78h6GoRzWYXIMZTAS61Cs/AApcshIiKqN2oVLHJzczFq1Ch8/PHHCAkJcXRN9Z75NEhCuD+0XjybREREZFarT8UJEyZgyJAhSEpKuua8BoMB2dnZVo+Gztxxk6dBiIiIrNk9AMOyZcvw559/YufOnTWaf8aMGZg2bZrdhdVnf/OKECIiokrZdcQiLS0NEydOxFdffQUfH58aLTNlyhRkZWVZHmlpabUqtD45djEPANAikv0riIiIyrPriMXu3buRnp6OG2+80TLNaDRiy5YtmDt3LgwGAzQajdUyOp0OOp3OMdXWAyKCY+m5AIBEBgsiIiIrdgWLAQMGYP/+/VbTxo0bh1atWuH555+3CRXuKD3HgFxDCdQqIC7MT+lyiIiI6hW7gkVgYCDatm1rNc3f3x9hYWE2092V+WhF01A/6LzcP0gRERHZg9dK2unYxbLTIBE8DUJERFRRnW/LuXnzZgeU0XCYO242Z/8KIiIiGzxiYScesSAiIqoag4Wdrl4R4q9wJURERPUPg4Ud8gwlOJtVCAC8RwgREVElGCzscOJSaf+KMH8tQvy1CldDRERU/zBY2IH9K4iIiKrHYGEH9q8gIiKqHoOFHcyXmvKIBRERUeUYLOxgORXCMSyIiIgqxWBRQ0aT4HhZ583mPGJBRERUKQaLGjp9JR9FJSbovNSICfZVuhwiIqJ6icGihsynQRLC/aFRqxSuhoiIqH5isKih4+y4SUREdE0MFjV08nJpsIgP91O4EiIiovqLwaKGTl7KBwDEh3EMCyIioqowWNSQeTjv+HAGCyIioqowWNRAYbERZ7MKAPCIBRERUXUYLGrg9JV8iAABOi+EB/DmY0RERFVhsKiBE+b+FeF+UKl4qSkREVFVGCxq4KS5fwVPgxAREVWLwaIGTpRdaprAjptERETVYrCoAfMRizgesSAiIqoWg0UNnLpc2scigYNjERERVYvB4hp4qSkREVHNMVhcQ2pG6aWmgT5eCPXnpaZERETVYbC4BvOImwnh/rzUlIiI6BoYLK6Bl5oSERHVHIPFNVjuahrGjptERETXwmBxDZa7mnIMCyIiomtisLgGyxELBgsiIqJrYrCoRkGREeeyCgEACexjQUREdE0MFtU4lVF6tELv640QXmpKRER0TQwW1bh6RQg7bhIREdUEg0U1Tl5mx00iIiJ7MFhUIzWjNFjEhfKIBRERUU0wWFQjrSxYxDJYEBER1QiDRTXMRyyaMlgQERHVCINFFUqMJpy5UnpX06bsvElERFQjDBZVOJdViBKTQKtRIyrQR+lyiIiIGgQGiyqY+1c0CfWFWs27mhIREdWEXcFiwYIFaN++PYKCghAUFITu3btj9erVzqpNUexfQUREZD+7gkWTJk0wc+ZM7N69G7t27UL//v1x55134uDBg86qTzEMFkRERPbzsmfmoUOHWj2fPn06FixYgG3btqFNmzYOLUxpaeaOmwwWRERENWZXsCjPaDRixYoVyMvLQ/fu3aucz2AwwGAwWJ5nZ2fXdpMulcoxLIiIiOxmd+fN/fv3IyAgADqdDo888ghWrlyJ1q1bVzn/jBkzoNfrLY/Y2Ng6FewqlsGxQhgsiIiIasruYNGyZUvs2bMH27dvx6OPPooxY8bg0KFDVc4/ZcoUZGVlWR5paWl1KtgVcgqLkZFXBACIDfVVuBoiIqKGw+5TIVqtFs2bNwcAdOrUCTt37sQHH3yAhQsXVjq/TqeDTqerW5UulpZR2r8i1F+LQB9vhashIiJqOOo8joXJZLLqQ+EO2L+CiIioduw6YjFlyhQMHjwYTZs2RU5ODr7++mts3rwZa9ascVZ9ikjjpaZERES1YlewSE9Px4MPPohz585Br9ejffv2WLNmDW655RZn1aeIq2NYsH8FERGRPewKFp9++qmz6qhXODgWERFR7fBeIZVIYx8LIiKiWmGwqMBkEpzmqJtERES1wmBRwYWcQhQZTfBSq9BIzz4WRERE9mCwqCD1culpkMYhvtDwdulERER2YbCogB03iYiIao/BogJ23CQiIqo9BosKeMSCiIio9hgsKmCwICIiqj0GiwpSM3ipKRERUW0xWJSTX1SCS7mlN1RjHwsiIiL7MViUY75dut7XG3pf3i6diIjIXgwW5fCupkRERHXDYFFOquVSU464SUREVBsMFuWkcgwLIiKiOmGwKIenQoiIiOqGwaIcjmFBRERUNwwWZUSEwYKIiKiOGCzKXMwxwFBigloFxASz8yYREVFtMFiUMR+tiAn2hbeGzUJERFQb/AQtw9MgREREdcdgUSaN9wghIiKqMwaLMhzDgoiIqO4YLMqkMVgQERHVGYNFGfaxICIiqjsGCwCFxUaczy4EwGBBRERUFwwWAE5fKe24GaDzQogfb5dORERUWwwWsO5foVKpFK6GiIio4WKwQPn+FRxxk4iIqC4YLMCOm0RERI7CYAEGCyIiIkdhsADHsCAiInIUjw8WIsJgQURE5CAeHywy8oqQV2SESgU05u3SiYiI6sTjg4W5f0V0kA98vDUKV0NERNSwMVjwNAgREZHDeHywSOMVIURERA7j8cGCl5oSERE5DoMFgwUREZHDeHywSMsovQEZ+1gQERHVnUcHi6ISE85mlQYLHrEgIiKqO7uCxYwZM9ClSxcEBgYiMjISw4YNQ3JysrNqc7qzmQUQAXy9NQgP0CpdDhERUYNnV7D49ddfMWHCBGzbtg3r1q1DcXExbr31VuTl5TmrPqe6eqmpL2+XTkRE5ABe9sz8yy+/WD1fvHgxIiMjsXv3bvTu3duhhbkCO24SERE5ll3BoqKsrCwAQGhoaJXzGAwGGAwGy/Ps7Oy6bNKheI8QIiIix6p1502TyYRJkyahR48eaNu2bZXzzZgxA3q93vKIjY2t7SYdjkcsiIiIHKvWwWLChAk4cOAAli1bVu18U6ZMQVZWluWRlpZW2006HIMFERGRY9XqVMjjjz+On376CVu2bEGTJk2qnVen00Gn09WqOGcSEaReZrAgIiJyJLuChYjgiSeewMqVK7F582YkJCQ4qy6nyyooRo6hBADQJITBgoiIyBHsChYTJkzA119/je+//x6BgYE4f/48AECv18PX19cpBTqL+TRIZKAOvlreLp2IiMgR7OpjsWDBAmRlZaFv375o1KiR5bF8+XJn1ec07F9BRETkeHafCnEXvEcIERGR43nsvUJSOYYFERGRw3lssEjjqRAiIiKH89hgwT4WREREjueRwaLEaMKZTN4unYiIyNE8MlicyyqE0STQeqkRGVj/Bu8iIiJqqDwyWFg6bob4Qq3m7dKJiIgcxaODBU+DEBERORaDBRERETmMRwcLjmFBRETkWB4ZLE4zWBARETmFRwYLngohIiJyDo8LFtmFxbiSXwyARyyIiIgczeOChXko7zB/LQJ0dt2DjYiIiK7BY4MFj1YQERE5nscFC/avICIich4GCyIiInIYDwwWvPkYERGRs3hcsGAfCyIiIufxqGBhNAlOXzEHC1+FqyEiInI/HhUsLmQXotgo8FKr0EjPYEFERORoHhUszB03m4T4QsPbpRMRETmcRwYL9q8gIiJyDo8KFmm81JSIiMipPCpYcAwLIiIi52KwICIiIofxqGDBMSyIiIicy2OCRZ6hBJdyiwAATcMYLIiIiJzBY4JFWtnAWHpfbwT5eCtcDRERkXvynGDBe4QQERE5nccEC3bcJCIicj6PCRbsuElEROR8HhMseMSCiIjI+RgsiIiIyGE8IliYTMLhvImIiFzAI4LFxVwDDCUmaNQqNAr2UbocIiIit+URwcJ8GiQm2AfeGo/YZSIiIkV4xKds6mWeBiEiInIFzwgW5ktNQxgsiIiInMkjgoV5OG+OYUFERORcdgeLLVu2YOjQoYiJiYFKpcKqVaucUJZj8YoQIiIi17A7WOTl5aFDhw6YN2+eM+pxCo5hQURE5Bpe9i4wePBgDB482Bm1OEVhsREXsg0AGCyIiIicze5gYS+DwQCDwWB5np2d7exNWjld1r8iUOeFYD/eLp2IiMiZnN55c8aMGdDr9ZZHbGysszdpJbXczcdUKpVLt01ERORpnB4spkyZgqysLMsjLS3N2Zu0wjEsiIiIXMfpp0J0Oh10Op2zN1Ol1IwCAEDTMAYLIiIiZ3P7cSyuDo7lq3AlRERE7s/uIxa5ubk4evSo5fmJEyewZ88ehIaGomnTpg4tzhHSMjg4FhERkavYHSx27dqFfv36WZ4//fTTAIAxY8Zg8eLFDivMEUTEMuom+1gQERE5n93Bom/fvhARZ9TicJfzipBfZIRKBTTmqRAiIiKnc+s+Fub+FY2CfKDz0ihcDRERkftz62DB/hVERESu5dbBgmNYEBERuZZ7BwvefIyIiMilPCNYcHAsIiIil3DrYME+FkRERK7ltsHCUGLEuexCAEBsCIMFERGRK7htsDhzpQAigK+3BuEBWqXLISIi8ghuGyzSrpTdfIy3SyciInIZtw0WqexfQURE5HJuGyzSeKkpERGRy7ltsLg6OBbvEUJEROQq7hssOIYFERGRy7llsBARngohIiJSgFsGi8z8YuQYSgAATTiGBRERkcu4ZbAwnwaJDNTBx5u3SyciInIVtw4WPA1CRETkWgwWRERE5DBuGSxOX+HgWEREREpwy2DBIxZERETKcO9gwTEsiIiIXMrtgkWx0YSzmaW3S+cRCyIiItdyu2BxLrMQRpNA56VGRIBO6XKIiIg8itsFi/J3NVWrebt0IiIiV3LbYMHTIERERK7ntsEiNoR3NSUiInI1twsWaRkcw4KIiEgpbhcseCqEiIhIOW4XLNKucAwLIiIipbhVsMgqKEZmfjEAIJa3SyciInI5twoW5v4V4QFa+Ou8FK6GiIjI87hlsGDHTSIiImW4VbBgx00iIiJlMVgQERGRw7hlsGDHTSIiImW4VbBgHwsiIiJluU2wMJoEZzILAHAMCyIiIqW4TbA4n12IYqPAW6NCdJCP0uUQERF5JLcJFqmXS0+DNAnxg4a3SyciIlKE2wQL9q8gIiJSXq2Cxbx58xAfHw8fHx9069YNO3bscHRddrt6qSlvl05ERKQUu4PF8uXL8fTTT+PVV1/Fn3/+iQ4dOmDgwIFIT093Rn01xjEsiIiIlGd3sHjvvfcwfvx4jBs3Dq1bt8aHH34IPz8/fPbZZ5XObzAYkJ2dbfVwBgYLIiIi5dkVLIqKirB7924kJSVdXYFajaSkJGzdurXSZWbMmAG9Xm95xMbG1q3iKrCPBRERkfLsChaXLl2C0WhEVFSU1fSoqCicP3++0mWmTJmCrKwsyyMtLa321VZBRPBo30SM6R7HIxZEREQKcvq9xXU6HXQ6nVO3oVKp8HCvZk7dBhEREV2bXUcswsPDodFocOHCBavpFy5cQHR0tEMLIyIioobHrmCh1WrRqVMnbNiwwTLNZDJhw4YN6N69u8OLIyIioobF7lMhTz/9NMaMGYPOnTuja9eueP/995GXl4dx48Y5oz4iIiJqQOwOFvfddx8uXryIV155BefPn8cNN9yAX375xaZDJxEREXkelYiIKzeYnZ0NvV6PrKwsBAUFuXLTVBd5eUBAQOnXubmAv7+y9RARkUvV9PPbbe4VQkRERMpjsCAiIiKHYbAgIiIih2GwICIiIodhsCAiIiKHYbAgIiIih2GwICIiIodhsCAiIiKHcfrdTSsyj8eVnZ3t6k1TXeTlXf06OxswGpWrhYiIXM78uX2tcTVdHixycnIAALGxsa7eNDlKTIzSFRARkUJycnKg1+urfN3lQ3qbTCacPXsWgYGBUKlUDltvdnY2YmNjkZaWxqHCy7BNbLFNbLFNKsd2scU2seVJbSIiyMnJQUxMDNTqqntSuPyIhVqtRpMmTZy2/qCgILf/5tqLbWKLbWKLbVI5tosttoktT2mT6o5UmLHzJhERETkMgwURERE5jNsEC51Oh1dffRU6nU7pUuoNtokttokttknl2C622Ca22Ca2XN55k4iIiNyX2xyxICIiIuUxWBAREZHDMFgQERGRwzBYEBERkcMwWBAREZHDuE2wmDdvHuLj4+Hj44Nu3bphx44dSpfkFDNmzECXLl0QGBiIyMhIDBs2DMnJyVbzFBYWYsKECQgLC0NAQADuueceXLhwwWqe1NRUDBkyBH5+foiMjMSzzz6LkpISV+6K08ycORMqlQqTJk2yTPPENjlz5gweeOABhIWFwdfXF+3atcOuXbssr4sIXnnlFTRq1Ai+vr5ISkpCSkqK1ToyMjIwatQoBAUFITg4GP/85z+Rm5vr6l1xCKPRiJdffhkJCQnw9fVFYmIiXn/9dasbKnlCm2zZsgVDhw5FTEwMVCoVVq1aZfW6o9pg37596NWrF3x8fBAbG4u33nrL2btWa9W1SXFxMZ5//nm0a9cO/v7+iImJwYMPPoizZ89arcPd2qROxA0sW7ZMtFqtfPbZZ3Lw4EEZP368BAcHy4ULF5QuzeEGDhwoixYtkgMHDsiePXvktttuk6ZNm0pubq5lnkceeURiY2Nlw4YNsmvXLrnpppvk5ptvtrxeUlIibdu2laSkJPnrr7/k559/lvDwcJkyZYoSu+RQO3bskPj4eGnfvr1MnDjRMt3T2iQjI0Pi4uJk7Nixsn37djl+/LisWbNGjh49apln5syZotfrZdWqVbJ371654447JCEhQQoKCizzDBo0SDp06CDbtm2T//3vf9K8eXMZOXKkErtUZ9OnT5ewsDD56aef5MSJE7JixQoJCAiQDz74wDKPJ7TJzz//LC+99JJ89913AkBWrlxp9boj2iArK0uioqJk1KhRcuDAAVm6dKn4+vrKwoULXbWbdqmuTTIzMyUpKUmWL18uR44cka1bt0rXrl2lU6dOVutwtzapC7cIFl27dpUJEyZYnhuNRomJiZEZM2YoWJVrpKenCwD59ddfRaT0h8Db21tWrFhhmefw4cMCQLZu3SoipT9EarVazp8/b5lnwYIFEhQUJAaDwbU74EA5OTnSokULWbdunfTp08cSLDyxTZ5//nnp2bNnla+bTCaJjo6Wt99+2zItMzNTdDqdLF26VEREDh06JABk586dlnlWr14tKpVKzpw547zinWTIkCHy0EMPWU27++67ZdSoUSLimW1S8UPUUW0wf/58CQkJsfrZef7556Vly5ZO3qO6qyxsVbRjxw4BIKdOnRIR928TezX4UyFFRUXYvXs3kpKSLNPUajWSkpKwdetWBStzjaysLABAaGgoAGD37t0oLi62ao9WrVqhadOmlvbYunUr2rVrh6ioKMs8AwcORHZ2Ng4ePOjC6h1rwoQJGDJkiNW+A57ZJj/88AM6d+6MESNGIDIyEh07dsTHH39sef3EiRM4f/68VZvo9Xp069bNqk2Cg4PRuXNnyzxJSUlQq9XYvn2763bGQW6++WZs2LABf//9NwBg7969+O233zB48GAAntkmFTmqDbZu3YrevXtDq9Va5hk4cCCSk5Nx5coVF+2N82RlZUGlUiE4OBgA26Qil9/d1NEuXboEo9Fo9YEAAFFRUThy5IhCVbmGyWTCpEmT0KNHD7Rt2xYAcP78eWi1Wssb3iwqKgrnz5+3zFNZe5lfa4iWLVuGP//8Ezt37rR5zRPb5Pjx41iwYAGefvppvPjii9i5cyeefPJJaLVajBkzxrJPle1z+TaJjIy0et3LywuhoaENsk1eeOEFZGdno1WrVtBoNDAajZg+fTpGjRoFAB7ZJhU5qg3Onz+PhIQEm3WYXwsJCXFK/a5QWFiI559/HiNHjrTczdTT26SiBh8sPNmECRNw4MAB/Pbbb0qXoqi0tDRMnDgR69atg4+Pj9Ll1AsmkwmdO3fGm2++CQDo2LEjDhw4gA8//BBjxoxRuDplfPPNN/jqq6/w9ddfo02bNtizZw8mTZqEmJgYj20Tsk9xcTHuvfdeiAgWLFigdDn1VoM/FRIeHg6NRmPTw//ChQuIjo5WqCrne/zxx/HTTz9h06ZNaNKkiWV6dHQ0ioqKkJmZaTV/+faIjo6utL3MrzU0u3fvRnp6Om688UZ4eXnBy8sLv/76K/7973/Dy8sLUVFRHtcmjRo1QuvWra2mXX/99UhNTQVwdZ+q+7mJjo5Genq61eslJSXIyMhokG3y7LPP4oUXXsD999+Pdu3aYfTo0XjqqacwY8YMAJ7ZJhU5qg3c7ecJuBoqTp06hXXr1lmOVgCe2yZVafDBQqvVolOnTtiwYYNlmslkwoYNG9C9e3cFK3MOEcHjjz+OlStXYuPGjTaH1jp16gRvb2+r9khOTkZqaqqlPbp37479+/db/SCYf1Aqfhg1BAMGDMD+/fuxZ88ey6Nz584YNWqU5WtPa5MePXrYXIb8999/Iy4uDgCQkJCA6OhoqzbJzs7G9u3brdokMzMTu3fvtsyzceNGmEwmdOvWzQV74Vj5+flQq61/5Wk0GphMJgCe2SYVOaoNunfvji1btqC4uNgyz7p169CyZcsGecjfHCpSUlKwfv16hIWFWb3uiW1SLaV7jzrCsmXLRKfTyeLFi+XQoUPyr3/9S4KDg616+LuLRx99VPR6vWzevFnOnTtneeTn51vmeeSRR6Rp06ayceNG2bVrl3Tv3l26d+9ued18aeWtt94qe/bskV9++UUiIiIa7KWVlSl/VYiI57XJjh07xMvLS6ZPny4pKSny1VdfiZ+fn3z55ZeWeWbOnCnBwcHy/fffy759++TOO++s9LLCjh07yvbt2+W3336TFi1aNKhLK8sbM2aMNG7c2HK56XfffSfh4eHy3HPPWebxhDbJycmRv/76S/766y8BIO+995789ddfliscHNEGmZmZEhUVJaNHj5YDBw7IsmXLxM/Pr95eWlldmxQVFckdd9whTZo0kT179lj93i1/hYe7tUlduEWwEBGZM2eONG3aVLRarXTt2lW2bdumdElOAaDSx6JFiyzzFBQUyGOPPSYhISHi5+cnd911l5w7d85qPSdPnpTBgweLr6+vhIeHy+TJk6W4uNjFe+M8FYOFJ7bJjz/+KG3bthWdTietWrWSjz76yOp1k8kkL7/8skRFRYlOp5MBAwZIcnKy1TyXL1+WkSNHSkBAgAQFBcm4ceMkJyfHlbvhMNnZ2TJx4kRp2rSp+Pj4SLNmzeSll16y+nDwhDbZtGlTpb9DxowZIyKOa4O9e/dKz549RafTSePGjWXmzJmu2kW7VdcmJ06cqPL37qZNmyzrcLc2qQuVSLlh54iIiIjqoMH3sSAiIqL6g8GCiIiIHIbBgoiIiByGwYKIiIgchsGCiIiIHIbBgoiIiByGwYKIiIgchsGCiIiIHIbBgoiIiByGwYKIiIgchsGCiIiIHOb/A8ivILzRL/4NAAAAAElFTkSuQmCC",
|
155 |
+
"text/plain": [
|
156 |
+
"<Figure size 640x480 with 1 Axes>"
|
157 |
+
]
|
158 |
+
},
|
159 |
+
"metadata": {},
|
160 |
+
"output_type": "display_data"
|
161 |
+
}
|
162 |
+
],
|
163 |
+
"source": [
|
164 |
+
"plt.plot([sum(min(l, m) for l in lens) for m in range(max(lens) + 1)])\n",
|
165 |
+
"plt.title(\"Token coverage as a function of max length\")\n",
|
166 |
+
"plt.axvline(x=256, color=\"red\")"
|
167 |
+
]
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"cell_type": "code",
|
171 |
+
"execution_count": null,
|
172 |
+
"metadata": {},
|
173 |
+
"outputs": [],
|
174 |
+
"source": []
|
175 |
+
}
|
176 |
+
],
|
177 |
+
"metadata": {
|
178 |
+
"kernelspec": {
|
179 |
+
"display_name": "dl3",
|
180 |
+
"language": "python",
|
181 |
+
"name": "python3"
|
182 |
+
},
|
183 |
+
"language_info": {
|
184 |
+
"codemirror_mode": {
|
185 |
+
"name": "ipython",
|
186 |
+
"version": 3
|
187 |
+
},
|
188 |
+
"file_extension": ".py",
|
189 |
+
"mimetype": "text/x-python",
|
190 |
+
"name": "python",
|
191 |
+
"nbconvert_exporter": "python",
|
192 |
+
"pygments_lexer": "ipython3",
|
193 |
+
"version": "3.10.8"
|
194 |
+
},
|
195 |
+
"orig_nbformat": 4,
|
196 |
+
"vscode": {
|
197 |
+
"interpreter": {
|
198 |
+
"hash": "90bfda469df5ac7fed8d7e225d563f60a7a7aa420ccfadb091c914debf775e49"
|
199 |
+
}
|
200 |
+
}
|
201 |
+
},
|
202 |
+
"nbformat": 4,
|
203 |
+
"nbformat_minor": 2
|
204 |
+
}
|
SVFT-main/LLM-Adapters/mathqa.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from datasets import load_dataset
|
4 |
+
|
5 |
+
dataset = load_dataset("math_qa")
|
6 |
+
save_path = "dataset/mathqa/test.json"
|
7 |
+
|
8 |
+
if not os.path.exists("dataset/mathqa/"):
|
9 |
+
os.makedirs("dataset/mathqa/")
|
10 |
+
|
11 |
+
|
12 |
+
def writer(data, save_path):
|
13 |
+
with open(save_path, "w") as f:
|
14 |
+
json.dump(data, f, indent=4)
|
15 |
+
|
16 |
+
test_data = []
|
17 |
+
for sample in dataset["test"]:
|
18 |
+
options = sample["options"].replace("a", "A").replace("b", "B").replace("c", "C").replace("d", "D").replace("e", "E").replace("f", "F")
|
19 |
+
test_data.append({
|
20 |
+
"instruction": f"{sample['Problem']} The options: {options}",
|
21 |
+
"input": "",
|
22 |
+
"output": "",
|
23 |
+
"answer": sample["correct"].upper(),
|
24 |
+
})
|
25 |
+
|
26 |
+
writer(test_data, save_path)
|
27 |
+
|
SVFT-main/LLM-Adapters/multi_dataset_eval.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from concurrent.futures import ProcessPoolExecutor
|
2 |
+
import queue
|
3 |
+
import subprocess
|
4 |
+
import os
|
5 |
+
|
6 |
+
def evaluate(dataset, gpu):
|
7 |
+
print('*******dataset:', dataset)
|
8 |
+
model_name = "Pythia_2B_143000_SVFT_CR15K"
|
9 |
+
save_dir= "results/" + model_name
|
10 |
+
|
11 |
+
if not os.path.exists(save_dir):
|
12 |
+
try:
|
13 |
+
os.makedirs(save_dir)
|
14 |
+
except:
|
15 |
+
pass
|
16 |
+
|
17 |
+
save_path = os.path.join(save_dir, dataset + ".txt")
|
18 |
+
command = f"CUDA_VISIBLE_DEVICES={gpu} python commonsense_evaluate_latest.py \
|
19 |
+
--model LLaMA-7B \
|
20 |
+
--adapter LoRA \
|
21 |
+
--dataset {dataset} \
|
22 |
+
--base_model './{model_name}' \
|
23 |
+
--batch_size 1| tee -a {save_path}"
|
24 |
+
|
25 |
+
result = subprocess.run(command, shell=True, text=True, capture_output=False)
|
26 |
+
print(f"Evaluation results for dataset {dataset} on GPU {gpu}:\n{result.stdout}")
|
27 |
+
return gpu
|
28 |
+
|
29 |
+
|
30 |
+
datasets = ["boolq", "social_i_qa", "piqa", "ARC-Easy", "ARC-Challenge", "winogrande", "openbookqa", "hellaswag"]
|
31 |
+
|
32 |
+
gpus = [0, 0, 0, 0]
|
33 |
+
tasks_queue = queue.Queue()
|
34 |
+
gpu_queue = queue.Queue()
|
35 |
+
|
36 |
+
for gpu in gpus:
|
37 |
+
gpu_queue.put(gpu)
|
38 |
+
for task in datasets:
|
39 |
+
tasks_queue.put(task)
|
40 |
+
|
41 |
+
num_processes = min(len(datasets), len(gpus)) # number of processes to run in parallel
|
42 |
+
|
43 |
+
with ProcessPoolExecutor(max_workers=num_processes) as executor:
|
44 |
+
futures = [executor.submit(evaluate, tasks_queue.get(), gpu_queue.get()) for i in range(num_processes)]
|
45 |
+
for future in futures:
|
46 |
+
gpu_id = future.result()
|
47 |
+
gpu_queue.put(gpu_id)
|
48 |
+
if tasks_queue.qsize() > 0:
|
49 |
+
futures.append(executor.submit(evaluate, tasks_queue.get(), gpu_queue.get()))
|
SVFT-main/LLM-Adapters/peft/LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
SVFT-main/LLM-Adapters/peft/Makefile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.PHONY: quality style test docs
|
2 |
+
|
3 |
+
check_dirs := src tests examples
|
4 |
+
|
5 |
+
# Check that source code meets quality standards
|
6 |
+
|
7 |
+
# this target runs checks on all files
|
8 |
+
quality:
|
9 |
+
black --check $(check_dirs)
|
10 |
+
ruff $(check_dirs)
|
11 |
+
doc-builder style src tests --max_len 119 --check_only
|
12 |
+
|
13 |
+
# Format source code automatically and check is there are any problems left that need manual fixing
|
14 |
+
style:
|
15 |
+
black $(check_dirs)
|
16 |
+
ruff $(check_dirs) --fix
|
17 |
+
doc-builder style src tests --max_len 119
|
18 |
+
|
19 |
+
test:
|
20 |
+
pytest tests/
|
SVFT-main/LLM-Adapters/peft/pyproject.toml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.black]
|
2 |
+
line-length = 119
|
3 |
+
target-version = ['py36']
|
4 |
+
|
5 |
+
[tool.ruff]
|
6 |
+
ignore = ["C901", "E501", "E741", "W605"]
|
7 |
+
select = ["C", "E", "F", "I", "W"]
|
8 |
+
line-length = 119
|
9 |
+
|
10 |
+
[tool.ruff.isort]
|
11 |
+
lines-after-imports = 2
|
12 |
+
known-first-party = ["peft"]
|
13 |
+
|
14 |
+
[isort]
|
15 |
+
default_section = "FIRSTPARTY"
|
16 |
+
known_first_party = "peft"
|
17 |
+
known_third_party = [
|
18 |
+
"numpy",
|
19 |
+
"torch",
|
20 |
+
"accelerate",
|
21 |
+
"transformers",
|
22 |
+
]
|
23 |
+
line_length = 119
|
24 |
+
lines_after_imports = 2
|
25 |
+
multi_line_output = 3
|
26 |
+
include_trailing_comma = true
|
27 |
+
force_grid_wrap = 0
|
28 |
+
use_parentheses = true
|
29 |
+
ensure_newline_before_comments = true
|
30 |
+
|
31 |
+
[tool.pytest]
|
32 |
+
doctest_optionflags = [
|
33 |
+
"NORMALIZE_WHITESPACE",
|
34 |
+
"ELLIPSIS",
|
35 |
+
"NUMBER",
|
36 |
+
]
|
SVFT-main/LLM-Adapters/peft/setup.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2023 The HuggingFace Team. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
from setuptools import find_packages, setup
|
16 |
+
|
17 |
+
extras = {}
|
18 |
+
extras["quality"] = ["black ~= 22.0", "ruff>=0.0.241"]
|
19 |
+
extras["docs_specific"] = ["hf-doc-builder"]
|
20 |
+
extras["dev"] = extras["quality"] + extras["docs_specific"]
|
21 |
+
extras["test"] = extras["dev"] + ["pytest", "pytest-xdist", "parameterized"]
|
22 |
+
|
23 |
+
setup(
|
24 |
+
name="peft",
|
25 |
+
version="0.3.0.dev0",
|
26 |
+
description="Parameter-Efficient Fine-Tuning (PEFT)",
|
27 |
+
license_files=["LICENSE"],
|
28 |
+
keywords="deep learning",
|
29 |
+
license="Apache",
|
30 |
+
author="The AGI-Edgerunners team",
|
31 |
+
author_email="[email protected]",
|
32 |
+
url="https://github.com/AGI-Edgerunners/LLM-Adapters",
|
33 |
+
package_dir={"": "src"},
|
34 |
+
packages=find_packages("src"),
|
35 |
+
entry_points={},
|
36 |
+
python_requires=">=3.7.0",
|
37 |
+
install_requires=[
|
38 |
+
"numpy>=1.17",
|
39 |
+
"packaging>=20.0",
|
40 |
+
"psutil",
|
41 |
+
"pyyaml",
|
42 |
+
"torch>=1.13.0",
|
43 |
+
"transformers",
|
44 |
+
"accelerate",
|
45 |
+
],
|
46 |
+
extras_require=extras,
|
47 |
+
classifiers=[
|
48 |
+
"Development Status :: 5 - Production/Stable",
|
49 |
+
"Intended Audience :: Developers",
|
50 |
+
"Intended Audience :: Education",
|
51 |
+
"Intended Audience :: Science/Research",
|
52 |
+
"License :: OSI Approved :: Apache Software License",
|
53 |
+
"Operating System :: OS Independent",
|
54 |
+
"Programming Language :: Python :: 3",
|
55 |
+
"Programming Language :: Python :: 3.7",
|
56 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
57 |
+
],
|
58 |
+
)
|
59 |
+
|
60 |
+
# Release checklist
|
61 |
+
# 1. Change the version in __init__.py and setup.py.
|
62 |
+
# 2. Commit these changes with the message: "Release: VERSION"
|
63 |
+
# 3. Add a tag in git to mark the release: "git tag VERSION -m 'Adds tag VERSION for pypi' "
|
64 |
+
# Push the tag to git: git push --tags origin main
|
65 |
+
# 4. Run the following commands in the top-level directory:
|
66 |
+
# python setup.py bdist_wheel
|
67 |
+
# python setup.py sdist
|
68 |
+
# 5. Upload the package to the pypi test server first:
|
69 |
+
# twine upload dist/* -r pypitest
|
70 |
+
# twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/
|
71 |
+
# 6. Check that you can install it in a virtualenv by running:
|
72 |
+
# pip install -i https://testpypi.python.org/pypi peft
|
73 |
+
# 7. Upload the final version to actual pypi:
|
74 |
+
# twine upload dist/* -r pypi
|
75 |
+
# 8. Add release notes to the tag in github once everything is looking hunky-dory.
|
76 |
+
# 9. Update the version in __init__.py, setup.py to the new version "-dev" and push to master
|
SVFT-main/LLM-Adapters/peft/src/peft/__init__.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# flake8: noqa
|
2 |
+
# There's no way to ignore "F401 '...' imported but unused" warnings in this
|
3 |
+
# module, but to preserve other warnings. So, don't check this module at all.
|
4 |
+
|
5 |
+
# coding=utf-8
|
6 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
7 |
+
#
|
8 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
9 |
+
# you may not use this file except in compliance with the License.
|
10 |
+
# You may obtain a copy of the License at
|
11 |
+
#
|
12 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13 |
+
#
|
14 |
+
# Unless required by applicable law or agreed to in writing, software
|
15 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
17 |
+
# See the License for the specific language governing permissions and
|
18 |
+
# limitations under the License.
|
19 |
+
|
20 |
+
__version__ = "0.3.0.dev0"
|
21 |
+
|
22 |
+
from .mapping import MODEL_TYPE_TO_PEFT_MODEL_MAPPING, PEFT_TYPE_TO_CONFIG_MAPPING, get_peft_config, get_peft_model
|
23 |
+
from .peft_model import (
|
24 |
+
PeftModel,
|
25 |
+
PeftModelForCausalLM,
|
26 |
+
PeftModelForSeq2SeqLM,
|
27 |
+
PeftModelForSequenceClassification,
|
28 |
+
PeftModelForTokenClassification,
|
29 |
+
)
|
30 |
+
from .tuners import (
|
31 |
+
LoraConfig,
|
32 |
+
LoraModel,
|
33 |
+
BottleneckConfig,
|
34 |
+
BottleneckModel,
|
35 |
+
PrefixEncoder,
|
36 |
+
PrefixTuningConfig,
|
37 |
+
PromptEmbedding,
|
38 |
+
PromptEncoder,
|
39 |
+
PromptEncoderConfig,
|
40 |
+
PromptEncoderReparameterizationType,
|
41 |
+
PromptTuningConfig,
|
42 |
+
PromptTuningInit,
|
43 |
+
)
|
44 |
+
from .utils import (
|
45 |
+
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
|
46 |
+
PeftConfig,
|
47 |
+
PeftType,
|
48 |
+
PromptLearningConfig,
|
49 |
+
TaskType,
|
50 |
+
bloom_model_postprocess_past_key_value,
|
51 |
+
get_peft_model_state_dict,
|
52 |
+
prepare_model_for_int8_training,
|
53 |
+
set_peft_model_state_dict,
|
54 |
+
shift_tokens_right,
|
55 |
+
)
|
SVFT-main/LLM-Adapters/peft/src/peft/mapping.py
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
from .peft_model import (
|
17 |
+
PeftModel,
|
18 |
+
PeftModelForCausalLM,
|
19 |
+
PeftModelForSeq2SeqLM,
|
20 |
+
PeftModelForSequenceClassification,
|
21 |
+
PeftModelForTokenClassification,
|
22 |
+
)
|
23 |
+
from .tuners import LoraConfig, PrefixTuningConfig, PromptEncoderConfig, PromptTuningConfig, BottleneckConfig
|
24 |
+
from .utils import PromptLearningConfig
|
25 |
+
|
26 |
+
|
27 |
+
MODEL_TYPE_TO_PEFT_MODEL_MAPPING = {
|
28 |
+
"SEQ_CLS": PeftModelForSequenceClassification,
|
29 |
+
"SEQ_2_SEQ_LM": PeftModelForSeq2SeqLM,
|
30 |
+
"CAUSAL_LM": PeftModelForCausalLM,
|
31 |
+
"TOKEN_CLS": PeftModelForTokenClassification,
|
32 |
+
}
|
33 |
+
|
34 |
+
PEFT_TYPE_TO_CONFIG_MAPPING = {
|
35 |
+
"PROMPT_TUNING": PromptTuningConfig,
|
36 |
+
"PREFIX_TUNING": PrefixTuningConfig,
|
37 |
+
"P_TUNING": PromptEncoderConfig,
|
38 |
+
"LORA": LoraConfig,
|
39 |
+
"BOTTLENECK": BottleneckConfig,
|
40 |
+
}
|
41 |
+
|
42 |
+
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = {
|
43 |
+
"t5": ["q", "v"],
|
44 |
+
"mt5": ["q", "v"],
|
45 |
+
"bart": ["q_proj", "v_proj"],
|
46 |
+
"gpt2": ["c_attn"],
|
47 |
+
"bloom": ["query_key_value"],
|
48 |
+
"opt": ["q_proj", "v_proj"],
|
49 |
+
"gptj": ["q_proj", "v_proj"],
|
50 |
+
"gpt_neox": ["query_key_value"],
|
51 |
+
"gpt_neo": ["q_proj", "v_proj"],
|
52 |
+
"bert": ["query", "value"],
|
53 |
+
"roberta": ["query", "value"],
|
54 |
+
"xlm-roberta": ["query", "value"],
|
55 |
+
"electra": ["query", "value"],
|
56 |
+
"deberta-v2": ["query_proj", "value_proj"],
|
57 |
+
"deberta": ["in_proj"],
|
58 |
+
"layoutlm": ["query", "value"],
|
59 |
+
"llama": ["q_proj", "v_proj"],
|
60 |
+
"chatglm": ["query_key_value"],
|
61 |
+
}
|
62 |
+
|
63 |
+
TRANSFORMERS_MODELS_TO_BOTTLENECK_TARGET_MODULES_MAPPING = {
|
64 |
+
"bloom": ["dense_h_to_4h", "dense_4h_to_h"],
|
65 |
+
"gptj": ["fc_in", "fc_out"],
|
66 |
+
"gpt_neo": ["c_fc", "c_proj"],
|
67 |
+
"llama": ["gate_proj", "up_proj", "down_proj"],
|
68 |
+
"opt": ["fc1", "fc2"],
|
69 |
+
"chatglm": ["dense_h_to_4h", "dense_4h_to_h"],
|
70 |
+
}
|
71 |
+
|
72 |
+
TRANSFORMERS_MODELS_TO_ADAPTERP_TARGET_MODULES_MAPPING = {
|
73 |
+
"bloom": ["dense_4h_to_h"],
|
74 |
+
"gptj": ["fc_out"],
|
75 |
+
"gpt_neo": ["c_proj"],
|
76 |
+
"llama": ["down_proj"],
|
77 |
+
"opt": ["fc2"],
|
78 |
+
"chatglm": ["dense_4h_to_h"],
|
79 |
+
}
|
80 |
+
|
81 |
+
TRANSFORMERS_MODELS_TO_PARALLEL_TARGET_MODULES_MAPPING = {
|
82 |
+
"bloom": ["query_key_value"],
|
83 |
+
"gptj": ["q_proj", "v_proj", "k_proj"],
|
84 |
+
"gpt_neo": ["q_proj", "v_proj", "k_proj"],
|
85 |
+
"llama": ["q_proj", "v_proj", "k_proj"],
|
86 |
+
"opt": ["q_proj", "v_proj", "k_proj"],
|
87 |
+
"chatglm": ["query_key_value"],
|
88 |
+
}
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
def get_peft_config(config_dict):
|
93 |
+
"""
|
94 |
+
Returns a Peft config object from a dictionary.
|
95 |
+
|
96 |
+
Args:
|
97 |
+
config_dict (`Dict[str, Any]`): Dictionary containing the configuration parameters.
|
98 |
+
"""
|
99 |
+
|
100 |
+
return PEFT_TYPE_TO_CONFIG_MAPPING[config_dict["peft_type"]](**config_dict)
|
101 |
+
|
102 |
+
|
103 |
+
def _prepare_prompt_learning_config(peft_config, model_config):
|
104 |
+
if peft_config.num_layers is None:
|
105 |
+
if "num_hidden_layers" in model_config:
|
106 |
+
num_layers = model_config["num_hidden_layers"]
|
107 |
+
elif "num_layers" in model_config:
|
108 |
+
num_layers = model_config["num_layers"]
|
109 |
+
elif "n_layer" in model_config:
|
110 |
+
num_layers = model_config["n_layer"]
|
111 |
+
else:
|
112 |
+
raise ValueError("Please specify `num_layers` in `peft_config`")
|
113 |
+
peft_config.num_layers = num_layers
|
114 |
+
|
115 |
+
if peft_config.token_dim is None:
|
116 |
+
if "hidden_size" in model_config:
|
117 |
+
token_dim = model_config["hidden_size"]
|
118 |
+
elif "n_embd" in model_config:
|
119 |
+
token_dim = model_config["n_embd"]
|
120 |
+
elif "d_model" in model_config:
|
121 |
+
token_dim = model_config["d_model"]
|
122 |
+
else:
|
123 |
+
raise ValueError("Please specify `token_dim` in `peft_config`")
|
124 |
+
peft_config.token_dim = token_dim
|
125 |
+
|
126 |
+
if peft_config.num_attention_heads is None:
|
127 |
+
if "num_attention_heads" in model_config:
|
128 |
+
num_attention_heads = model_config["num_attention_heads"]
|
129 |
+
elif "n_head" in model_config:
|
130 |
+
num_attention_heads = model_config["n_head"]
|
131 |
+
elif "num_heads" in model_config:
|
132 |
+
num_attention_heads = model_config["num_heads"]
|
133 |
+
elif "encoder_attention_heads" in model_config:
|
134 |
+
num_attention_heads = model_config["encoder_attention_heads"]
|
135 |
+
else:
|
136 |
+
raise ValueError("Please specify `num_attention_heads` in `peft_config`")
|
137 |
+
peft_config.num_attention_heads = num_attention_heads
|
138 |
+
|
139 |
+
if getattr(peft_config, "encoder_hidden_size", None) is None:
|
140 |
+
setattr(peft_config, "encoder_hidden_size", token_dim)
|
141 |
+
|
142 |
+
return peft_config
|
143 |
+
|
144 |
+
|
145 |
+
def _prepare_lora_config(peft_config, model_config):
|
146 |
+
if peft_config.target_modules is None:
|
147 |
+
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
148 |
+
raise ValueError("Please specify `target_modules` in `peft_config`")
|
149 |
+
peft_config.target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
150 |
+
if len(peft_config.target_modules) == 1:
|
151 |
+
peft_config.fan_in_fan_out = True
|
152 |
+
peft_config.enable_lora = [True, False, True]
|
153 |
+
if peft_config.inference_mode:
|
154 |
+
peft_config.merge_weights = True
|
155 |
+
return peft_config
|
156 |
+
|
157 |
+
|
158 |
+
def _prepare_bottleneck_config(peft_config, model_config):
|
159 |
+
if peft_config.target_modules is None:
|
160 |
+
if peft_config.use_parallel_adapter:
|
161 |
+
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_PARALLEL_TARGET_MODULES_MAPPING:
|
162 |
+
raise ValueError("Please specify `target_modules` in `peft_config`")
|
163 |
+
peft_config.target_modules = TRANSFORMERS_MODELS_TO_PARALLEL_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
164 |
+
elif peft_config.use_adapterp:
|
165 |
+
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_ADAPTERP_TARGET_MODULES_MAPPING:
|
166 |
+
raise ValueError("Please specify `target_modules` in `peft_config`")
|
167 |
+
peft_config.target_modules = TRANSFORMERS_MODELS_TO_ADAPTERP_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
168 |
+
else:
|
169 |
+
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_BOTTLENECK_TARGET_MODULES_MAPPING:
|
170 |
+
raise ValueError("Please specify `target_modules` in `peft_config`")
|
171 |
+
peft_config.target_modules = TRANSFORMERS_MODELS_TO_BOTTLENECK_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
172 |
+
|
173 |
+
return peft_config
|
174 |
+
|
175 |
+
|
176 |
+
|
177 |
+
def get_peft_model(model, peft_config):
|
178 |
+
"""
|
179 |
+
Returns a Peft model object from a model and a config.
|
180 |
+
|
181 |
+
Args:
|
182 |
+
model ([`transformers.PreTrainedModel`]): Model to be wrapped.
|
183 |
+
peft_config ([`PeftConfig`]): Configuration object containing the parameters of the Peft model.
|
184 |
+
"""
|
185 |
+
|
186 |
+
model_config = model.config.to_dict()
|
187 |
+
peft_config.base_model_name_or_path = model.__dict__.get("name_or_path", None)
|
188 |
+
if peft_config.task_type not in MODEL_TYPE_TO_PEFT_MODEL_MAPPING.keys():
|
189 |
+
if peft_config.peft_type == "LORA":
|
190 |
+
peft_config = _prepare_lora_config(peft_config, model_config)
|
191 |
+
return PeftModel(model, peft_config)
|
192 |
+
elif peft_config.peft_type == "BOTTLENECK":
|
193 |
+
peft_config = _prepare_bottleneck_config(peft_config, model_config)
|
194 |
+
return PeftModel(model, peft_config)
|
195 |
+
if not isinstance(peft_config, PromptLearningConfig):
|
196 |
+
if peft_config.peft_type == "BOTTLENECK":
|
197 |
+
peft_config = _prepare_bottleneck_config(peft_config, model_config)
|
198 |
+
elif peft_config.peft_type == "LORA":
|
199 |
+
peft_config = _prepare_lora_config(peft_config, model_config)
|
200 |
+
else:
|
201 |
+
peft_config = _prepare_prompt_learning_config(peft_config, model_config)
|
202 |
+
return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](model, peft_config)
|
SVFT-main/LLM-Adapters/peft/src/peft/peft_model.py
ADDED
@@ -0,0 +1,974 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
import inspect
|
17 |
+
import os
|
18 |
+
import warnings
|
19 |
+
from contextlib import contextmanager
|
20 |
+
|
21 |
+
import torch
|
22 |
+
from accelerate import dispatch_model, infer_auto_device_map
|
23 |
+
from accelerate.hooks import AlignDevicesHook, add_hook_to_module, remove_hook_from_submodules
|
24 |
+
from accelerate.utils import get_balanced_memory
|
25 |
+
from huggingface_hub import hf_hub_download
|
26 |
+
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
27 |
+
from transformers import PreTrainedModel
|
28 |
+
from transformers.modeling_outputs import SequenceClassifierOutput, TokenClassifierOutput
|
29 |
+
from transformers.utils import PushToHubMixin
|
30 |
+
|
31 |
+
from .tuners import LoraModel, BottleneckModel, PrefixEncoder, PromptEmbedding, PromptEncoder
|
32 |
+
from .utils import (
|
33 |
+
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
|
34 |
+
WEIGHTS_NAME,
|
35 |
+
PeftConfig,
|
36 |
+
PeftType,
|
37 |
+
PromptLearningConfig,
|
38 |
+
TaskType,
|
39 |
+
_set_trainable,
|
40 |
+
get_peft_model_state_dict,
|
41 |
+
set_peft_model_state_dict,
|
42 |
+
shift_tokens_right,
|
43 |
+
)
|
44 |
+
|
45 |
+
|
46 |
+
class PeftModel(PushToHubMixin, torch.nn.Module):
|
47 |
+
"""
|
48 |
+
Parameter-Efficient Fine-Tuning Model. Base model encompassing various Peft methods.
|
49 |
+
|
50 |
+
Args:
|
51 |
+
model ([`PreTrainedModel`]): The base transformer model used for Peft.
|
52 |
+
peft_config ([`PeftConfig`]): The configuration of the Peft model.
|
53 |
+
|
54 |
+
|
55 |
+
**Attributes**:
|
56 |
+
- **base_model** ([`PreTrainedModel`]) -- The base transformer model used for Peft.
|
57 |
+
- **peft_config** ([`PeftConfig`]) -- The configuration of the Peft model.
|
58 |
+
- **modules_to_save** (`list` of `str`) -- The list of sub-module names to save when
|
59 |
+
saving the model.
|
60 |
+
- **prompt_encoder** ([`PromptEncoder`]) -- The prompt encoder used for Peft if
|
61 |
+
`isinstance(self.peft_config, PromptLearningConfig)`.
|
62 |
+
- **prompt_tokens** (`torch.Tensor`) -- The virtual prompt tokens used for Peft if
|
63 |
+
`isinstance(self.peft_config, PromptLearningConfig)`.
|
64 |
+
- **transformer_backbone_name** (`str`) -- The name of the transformer
|
65 |
+
backbone in the base model if `isinstance(self.peft_config, PromptLearningConfig)`.
|
66 |
+
- **word_embeddings** (`torch.nn.Embedding`) -- The word embeddings of the transformer backbone
|
67 |
+
in the base model if `isinstance(self.peft_config, PromptLearningConfig)`.
|
68 |
+
"""
|
69 |
+
|
70 |
+
def __init__(self, model, peft_config: PeftConfig):
|
71 |
+
super().__init__()
|
72 |
+
self.peft_config = peft_config
|
73 |
+
self.base_model = model
|
74 |
+
self.config = self.base_model.config
|
75 |
+
self.modules_to_save = None
|
76 |
+
if isinstance(self.peft_config, PromptLearningConfig):
|
77 |
+
self._setup_prompt_encoder()
|
78 |
+
else:
|
79 |
+
if self.peft_config.peft_type == PeftType.LORA:
|
80 |
+
self.base_model = LoraModel(peft_config, model)
|
81 |
+
elif self.peft_config.peft_type == PeftType.BOTTLENECK:
|
82 |
+
self.base_model = BottleneckModel(peft_config, model)
|
83 |
+
if getattr(self.peft_config, "modules_to_save", None) is not None:
|
84 |
+
self.modules_to_save = self.peft_config.modules_to_save
|
85 |
+
_set_trainable(self)
|
86 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
87 |
+
self.base_model_torch_dtype = getattr(model, "dtype", None)
|
88 |
+
|
89 |
+
def save_pretrained(self, save_directory, **kwargs):
|
90 |
+
r"""
|
91 |
+
Args:
|
92 |
+
This function saves the adapter model and the adapter configuration files to a directory, so that it can be
|
93 |
+
re-loaded using the `LoraModel.from_pretrained` class method, and also used by the `LoraModel.push_to_hub`
|
94 |
+
method.
|
95 |
+
save_directory (`str`):
|
96 |
+
Directory where the adapter model and configuration files will be saved (will be created if it does not
|
97 |
+
exist).
|
98 |
+
**kwargs:
|
99 |
+
Additional keyword arguments passed along to the `push_to_hub` method.
|
100 |
+
"""
|
101 |
+
if os.path.isfile(save_directory):
|
102 |
+
raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
|
103 |
+
os.makedirs(save_directory, exist_ok=True)
|
104 |
+
|
105 |
+
# save only the trainable weights
|
106 |
+
output_state_dict = get_peft_model_state_dict(self, kwargs.get("state_dict", None))
|
107 |
+
torch.save(output_state_dict, os.path.join(save_directory, WEIGHTS_NAME))
|
108 |
+
|
109 |
+
# save the config and change the inference mode to `True`
|
110 |
+
if self.peft_config.base_model_name_or_path is None:
|
111 |
+
self.peft_config.base_model_name_or_path = (
|
112 |
+
self.base_model.__dict__.get("name_or_path", None)
|
113 |
+
if isinstance(self.peft_config, PromptLearningConfig)
|
114 |
+
else self.base_model.model.__dict__.get("name_or_path", None)
|
115 |
+
)
|
116 |
+
inference_mode = self.peft_config.inference_mode
|
117 |
+
self.peft_config.inference_mode = True
|
118 |
+
self.peft_config.save_pretrained(save_directory)
|
119 |
+
self.peft_config.inference_mode = inference_mode
|
120 |
+
|
121 |
+
@classmethod
|
122 |
+
def from_pretrained(cls, model, model_id, **kwargs):
|
123 |
+
r"""
|
124 |
+
Args:
|
125 |
+
Instantiate a `LoraModel` from a pretrained Lora configuration and weights.
|
126 |
+
model (`transformers.PreTrainedModel`):
|
127 |
+
The model to be adapted. The model should be initialized with the `from_pretrained` method. from
|
128 |
+
`transformers` library.
|
129 |
+
model_id (`str`):
|
130 |
+
The name of the Lora configuration to use. Can be either:
|
131 |
+
- A string, the `model id` of a Lora configuration hosted inside a model repo on
|
132 |
+
huggingface Hub
|
133 |
+
- A path to a directory containing a Lora configuration file saved using the
|
134 |
+
`save_pretrained` method, e.g., ``./my_lora_config_directory/``.
|
135 |
+
"""
|
136 |
+
from .mapping import MODEL_TYPE_TO_PEFT_MODEL_MAPPING, PEFT_TYPE_TO_CONFIG_MAPPING
|
137 |
+
|
138 |
+
# load the config
|
139 |
+
config = PEFT_TYPE_TO_CONFIG_MAPPING[PeftConfig.from_pretrained(model_id).peft_type].from_pretrained(model_id)
|
140 |
+
|
141 |
+
if getattr(model, "hf_device_map", None) is not None:
|
142 |
+
remove_hook_from_submodules(model)
|
143 |
+
|
144 |
+
if config.task_type not in MODEL_TYPE_TO_PEFT_MODEL_MAPPING.keys():
|
145 |
+
model = cls(model, config)
|
146 |
+
else:
|
147 |
+
model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config)
|
148 |
+
|
149 |
+
# load weights if any
|
150 |
+
if os.path.exists(os.path.join(model_id, WEIGHTS_NAME)):
|
151 |
+
filename = os.path.join(model_id, WEIGHTS_NAME)
|
152 |
+
else:
|
153 |
+
try:
|
154 |
+
filename = hf_hub_download(model_id, WEIGHTS_NAME)
|
155 |
+
except: # noqa
|
156 |
+
raise ValueError(
|
157 |
+
f"Can't find weights for {model_id} in {model_id} or in the Hugging Face Hub. "
|
158 |
+
f"Please check that the file {WEIGHTS_NAME} is present at {model_id}."
|
159 |
+
)
|
160 |
+
|
161 |
+
adapters_weights = torch.load(
|
162 |
+
filename, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
163 |
+
)
|
164 |
+
# load the weights into the model
|
165 |
+
model = set_peft_model_state_dict(model, adapters_weights)
|
166 |
+
if getattr(model, "hf_device_map", None) is not None:
|
167 |
+
device_map = kwargs.get("device_map", "auto")
|
168 |
+
max_memory = kwargs.get("max_memory", None)
|
169 |
+
no_split_module_classes = model._no_split_modules
|
170 |
+
if device_map != "sequential":
|
171 |
+
max_memory = get_balanced_memory(
|
172 |
+
model,
|
173 |
+
max_memory=max_memory,
|
174 |
+
no_split_module_classes=no_split_module_classes,
|
175 |
+
low_zero=(device_map == "balanced_low_0"),
|
176 |
+
)
|
177 |
+
if isinstance(device_map, str):
|
178 |
+
device_map = infer_auto_device_map(
|
179 |
+
model, max_memory=max_memory, no_split_module_classes=no_split_module_classes
|
180 |
+
)
|
181 |
+
model = dispatch_model(model, device_map=device_map)
|
182 |
+
hook = AlignDevicesHook(io_same_device=True)
|
183 |
+
if model.peft_config.peft_type == PeftType.LORA or model.peft_config.peft_type == PeftType.BOTTLENECK:
|
184 |
+
add_hook_to_module(model.base_model.model, hook)
|
185 |
+
else:
|
186 |
+
remove_hook_from_submodules(model.prompt_encoder)
|
187 |
+
add_hook_to_module(model.base_model, hook)
|
188 |
+
return model
|
189 |
+
|
190 |
+
def _setup_prompt_encoder(self):
|
191 |
+
transformer_backbone = None
|
192 |
+
for name, module in self.base_model.named_children():
|
193 |
+
for param in module.parameters():
|
194 |
+
param.requires_grad = False
|
195 |
+
if isinstance(module, PreTrainedModel):
|
196 |
+
# Make sure to freeze Tranformers model
|
197 |
+
if transformer_backbone is None:
|
198 |
+
transformer_backbone = module
|
199 |
+
self.transformer_backbone_name = name
|
200 |
+
|
201 |
+
if self.peft_config.num_transformer_submodules is None:
|
202 |
+
self.peft_config.num_transformer_submodules = (
|
203 |
+
2 if self.peft_config.task_type == TaskType.SEQ_2_SEQ_LM else 1
|
204 |
+
)
|
205 |
+
|
206 |
+
for named_param, value in list(transformer_backbone.named_parameters()):
|
207 |
+
if value.shape[0] == self.base_model.config.vocab_size:
|
208 |
+
self.word_embeddings = transformer_backbone.get_submodule(named_param.replace(".weight", ""))
|
209 |
+
break
|
210 |
+
|
211 |
+
if self.peft_config.peft_type == PeftType.PROMPT_TUNING:
|
212 |
+
prompt_encoder = PromptEmbedding(self.peft_config, self.word_embeddings)
|
213 |
+
elif self.peft_config.peft_type == PeftType.P_TUNING:
|
214 |
+
prompt_encoder = PromptEncoder(self.peft_config)
|
215 |
+
elif self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
216 |
+
prompt_encoder = PrefixEncoder(self.peft_config)
|
217 |
+
else:
|
218 |
+
raise ValueError("Not supported")
|
219 |
+
self.prompt_encoder = prompt_encoder
|
220 |
+
self.prompt_tokens = torch.arange(
|
221 |
+
self.peft_config.num_virtual_tokens * self.peft_config.num_transformer_submodules
|
222 |
+
).long()
|
223 |
+
|
224 |
+
def get_prompt_embedding_to_save(self):
|
225 |
+
"""
|
226 |
+
Returns the prompt embedding to save when saving the model. Only applicable when `peft_config.peft_type !=
|
227 |
+
PeftType.LORA`.
|
228 |
+
"""
|
229 |
+
prompt_tokens = self.prompt_tokens.unsqueeze(0).expand(1, -1).to(self.device)
|
230 |
+
if self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
231 |
+
prompt_tokens = prompt_tokens[:, : self.peft_config.num_virtual_tokens]
|
232 |
+
prompt_embeddings = self.prompt_encoder(prompt_tokens)
|
233 |
+
return prompt_embeddings[0].detach().cpu()
|
234 |
+
|
235 |
+
def get_prompt(self, batch_size):
|
236 |
+
"""
|
237 |
+
Returns the virtual prompts to use for Peft. Only applicable when `peft_config.peft_type != PeftType.LORA`.
|
238 |
+
"""
|
239 |
+
prompt_tokens = self.prompt_tokens.unsqueeze(0).expand(batch_size, -1).to(self.device)
|
240 |
+
if self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
241 |
+
prompt_tokens = prompt_tokens[:, : self.peft_config.num_virtual_tokens]
|
242 |
+
if self.peft_config.inference_mode:
|
243 |
+
past_key_values = self.prompt_encoder.embedding.weight.repeat(batch_size, 1, 1)
|
244 |
+
else:
|
245 |
+
past_key_values = self.prompt_encoder(prompt_tokens)
|
246 |
+
past_key_values = past_key_values.view(
|
247 |
+
batch_size,
|
248 |
+
self.peft_config.num_virtual_tokens,
|
249 |
+
self.peft_config.num_layers * 2,
|
250 |
+
self.peft_config.num_attention_heads,
|
251 |
+
self.peft_config.token_dim // self.peft_config.num_attention_heads,
|
252 |
+
)
|
253 |
+
if self.peft_config.num_transformer_submodules == 2:
|
254 |
+
past_key_values = torch.cat([past_key_values, past_key_values], dim=2)
|
255 |
+
past_key_values = past_key_values.permute([2, 0, 3, 1, 4]).split(
|
256 |
+
self.peft_config.num_transformer_submodules * 2
|
257 |
+
)
|
258 |
+
if TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING.get(self.config.model_type, None) is not None:
|
259 |
+
post_process_fn = TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING[self.config.model_type]
|
260 |
+
past_key_values = post_process_fn(past_key_values)
|
261 |
+
return past_key_values
|
262 |
+
else:
|
263 |
+
if self.peft_config.inference_mode:
|
264 |
+
prompts = self.prompt_encoder.embedding.weight.repeat(batch_size, 1, 1)
|
265 |
+
else:
|
266 |
+
prompts = self.prompt_encoder(prompt_tokens)
|
267 |
+
return prompts
|
268 |
+
|
269 |
+
def print_trainable_parameters(self):
|
270 |
+
"""
|
271 |
+
Prints the number of trainable parameters in the model.
|
272 |
+
"""
|
273 |
+
trainable_params = 0
|
274 |
+
all_param = 0
|
275 |
+
for _, param in self.named_parameters():
|
276 |
+
num_params = param.numel()
|
277 |
+
# if using DS Zero 3 and the weights are initialized empty
|
278 |
+
if num_params == 0 and hasattr(param, "ds_numel"):
|
279 |
+
num_params = param.ds_numel
|
280 |
+
|
281 |
+
all_param += num_params
|
282 |
+
if param.requires_grad:
|
283 |
+
trainable_params += num_params
|
284 |
+
print(
|
285 |
+
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
|
286 |
+
)
|
287 |
+
|
288 |
+
def __getattr__(self, name: str):
|
289 |
+
"""Forward missing attributes to the wrapped module."""
|
290 |
+
try:
|
291 |
+
return super().__getattr__(name) # defer to nn.Module's logic
|
292 |
+
except AttributeError:
|
293 |
+
return getattr(self.base_model, name)
|
294 |
+
|
295 |
+
def forward(self, *args, **kwargs):
|
296 |
+
"""
|
297 |
+
Forward pass of the model.
|
298 |
+
"""
|
299 |
+
return self.get_base_model()(*args, **kwargs)
|
300 |
+
|
301 |
+
@contextmanager
|
302 |
+
def disable_adapter(self):
|
303 |
+
"""
|
304 |
+
Disables the adapter module.
|
305 |
+
"""
|
306 |
+
if isinstance(self.peft_config, PromptLearningConfig):
|
307 |
+
old_forward = self.forward
|
308 |
+
self.forward = self.base_model.forward
|
309 |
+
else:
|
310 |
+
self.base_model.disable_adapter_layers()
|
311 |
+
yield
|
312 |
+
if isinstance(self.peft_config, PromptLearningConfig):
|
313 |
+
self.forward = old_forward
|
314 |
+
else:
|
315 |
+
self.base_model.enable_adapter_layers()
|
316 |
+
|
317 |
+
def get_base_model(self):
|
318 |
+
"""
|
319 |
+
Returns the base model.
|
320 |
+
"""
|
321 |
+
return self.base_model if isinstance(self.peft_config, PromptLearningConfig) else self.base_model.model
|
322 |
+
|
323 |
+
|
324 |
+
class PeftModelForSequenceClassification(PeftModel):
|
325 |
+
"""
|
326 |
+
Peft model for sequence classification tasks.
|
327 |
+
|
328 |
+
Args:
|
329 |
+
model ([`PreTrainedModel`]): Base transformer model
|
330 |
+
peft_config ([`PeftConfig`]): Peft config.
|
331 |
+
|
332 |
+
**Attributes**:
|
333 |
+
- **config** ([`PretrainedConfig`]) -- The configuration object of the base model.
|
334 |
+
- **cls_layer_name** (`str`) -- The name of the classification layer.
|
335 |
+
|
336 |
+
Example::
|
337 |
+
|
338 |
+
>>> from transformers import AutoModelForSequenceClassification >>> from peft import
|
339 |
+
PeftModelForSequenceClassification, get_peft_config >>> config = {
|
340 |
+
'peft_type': 'PREFIX_TUNING', 'task_type': 'SEQ_CLS', 'inference_mode': False, 'num_virtual_tokens':
|
341 |
+
20, 'token_dim': 768, 'num_transformer_submodules': 1, 'num_attention_heads': 12, 'num_layers': 12,
|
342 |
+
'encoder_hidden_size': 768, 'prefix_projection': False, 'postprocess_past_key_value_function': None
|
343 |
+
}
|
344 |
+
>>> peft_config = get_peft_config(config) >>> model =
|
345 |
+
AutoModelForSequenceClassification.from_pretrained("bert-base-cased") >>> peft_model =
|
346 |
+
PeftModelForSequenceClassification(model, peft_config) >>> peft_model.print_trainable_parameters() trainable
|
347 |
+
params: 370178 || all params: 108680450 || trainable%: 0.3406113979101117
|
348 |
+
"""
|
349 |
+
|
350 |
+
def __init__(self, model, peft_config: PeftConfig):
|
351 |
+
super().__init__(model, peft_config)
|
352 |
+
self.modules_to_save = ["classifier", "score"]
|
353 |
+
|
354 |
+
for name, _ in self.base_model.named_children():
|
355 |
+
if any(module_name in name for module_name in self.modules_to_save):
|
356 |
+
self.cls_layer_name = name
|
357 |
+
break
|
358 |
+
|
359 |
+
# to make sure classifier layer is trainable
|
360 |
+
_set_trainable(self)
|
361 |
+
|
362 |
+
def forward(
|
363 |
+
self,
|
364 |
+
input_ids=None,
|
365 |
+
attention_mask=None,
|
366 |
+
inputs_embeds=None,
|
367 |
+
labels=None,
|
368 |
+
output_attentions=None,
|
369 |
+
output_hidden_states=None,
|
370 |
+
return_dict=None,
|
371 |
+
**kwargs,
|
372 |
+
):
|
373 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
374 |
+
|
375 |
+
if not isinstance(self.peft_config, PromptLearningConfig):
|
376 |
+
return self.base_model(
|
377 |
+
input_ids=input_ids,
|
378 |
+
attention_mask=attention_mask,
|
379 |
+
inputs_embeds=inputs_embeds,
|
380 |
+
labels=labels,
|
381 |
+
output_attentions=output_attentions,
|
382 |
+
output_hidden_states=output_hidden_states,
|
383 |
+
return_dict=return_dict,
|
384 |
+
**kwargs,
|
385 |
+
)
|
386 |
+
|
387 |
+
batch_size = input_ids.shape[0]
|
388 |
+
if attention_mask is not None:
|
389 |
+
# concat prompt attention mask
|
390 |
+
prefix_attention_mask = torch.ones(batch_size, self.peft_config.num_virtual_tokens).to(self.device)
|
391 |
+
attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=1)
|
392 |
+
if kwargs.get("position_ids", None) is not None:
|
393 |
+
warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
|
394 |
+
kwargs["position_ids"] = None
|
395 |
+
kwargs.update(
|
396 |
+
{
|
397 |
+
"attention_mask": attention_mask,
|
398 |
+
"labels": labels,
|
399 |
+
"output_attentions": output_attentions,
|
400 |
+
"output_hidden_states": output_hidden_states,
|
401 |
+
"return_dict": return_dict,
|
402 |
+
}
|
403 |
+
)
|
404 |
+
|
405 |
+
if self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
406 |
+
return self._prefix_tuning_forward(input_ids=input_ids, **kwargs)
|
407 |
+
else:
|
408 |
+
if kwargs.get("token_type_ids", None) is not None:
|
409 |
+
kwargs["token_type_ids"] = torch.cat(
|
410 |
+
(
|
411 |
+
torch.zeros(batch_size, self.peft_config.num_virtual_tokens).to(self.device),
|
412 |
+
kwargs["token_type_ids"],
|
413 |
+
),
|
414 |
+
dim=1,
|
415 |
+
).long()
|
416 |
+
if inputs_embeds is None:
|
417 |
+
inputs_embeds = self.word_embeddings(input_ids)
|
418 |
+
prompts = self.get_prompt(batch_size=batch_size)
|
419 |
+
prompts = prompts.to(inputs_embeds.dtype)
|
420 |
+
inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
|
421 |
+
return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
|
422 |
+
|
423 |
+
def _prefix_tuning_forward(
|
424 |
+
self,
|
425 |
+
input_ids=None,
|
426 |
+
attention_mask=None,
|
427 |
+
inputs_embeds=None,
|
428 |
+
labels=None,
|
429 |
+
output_attentions=None,
|
430 |
+
output_hidden_states=None,
|
431 |
+
return_dict=None,
|
432 |
+
**kwargs,
|
433 |
+
):
|
434 |
+
batch_size = input_ids.shape[0]
|
435 |
+
past_key_values = self.get_prompt(batch_size)
|
436 |
+
fwd_params = list(inspect.signature(self.base_model.forward).parameters.keys())
|
437 |
+
kwargs.update(
|
438 |
+
{
|
439 |
+
"input_ids": input_ids,
|
440 |
+
"attention_mask": attention_mask,
|
441 |
+
"inputs_embeds": inputs_embeds,
|
442 |
+
"output_attentions": output_attentions,
|
443 |
+
"output_hidden_states": output_hidden_states,
|
444 |
+
"return_dict": return_dict,
|
445 |
+
"past_key_values": past_key_values,
|
446 |
+
}
|
447 |
+
)
|
448 |
+
if "past_key_values" in fwd_params:
|
449 |
+
return self.base_model(labels=labels, **kwargs)
|
450 |
+
else:
|
451 |
+
transformer_backbone_name = self.base_model.get_submodule(self.transformer_backbone_name)
|
452 |
+
fwd_params = list(inspect.signature(transformer_backbone_name.forward).parameters.keys())
|
453 |
+
if "past_key_values" not in fwd_params:
|
454 |
+
raise ValueError("Model does not support past key values which are required for prefix tuning.")
|
455 |
+
outputs = transformer_backbone_name(**kwargs)
|
456 |
+
pooled_output = outputs[1] if len(outputs) > 1 else outputs[0]
|
457 |
+
if "dropout" in [name for name, _ in list(self.base_model.named_children())]:
|
458 |
+
pooled_output = self.base_model.dropout(pooled_output)
|
459 |
+
logits = self.base_model.get_submodule(self.cls_layer_name)(pooled_output)
|
460 |
+
|
461 |
+
loss = None
|
462 |
+
if labels is not None:
|
463 |
+
if self.config.problem_type is None:
|
464 |
+
if self.base_model.num_labels == 1:
|
465 |
+
self.config.problem_type = "regression"
|
466 |
+
elif self.base_model.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
|
467 |
+
self.config.problem_type = "single_label_classification"
|
468 |
+
else:
|
469 |
+
self.config.problem_type = "multi_label_classification"
|
470 |
+
|
471 |
+
if self.config.problem_type == "regression":
|
472 |
+
loss_fct = MSELoss()
|
473 |
+
if self.base_model.num_labels == 1:
|
474 |
+
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
475 |
+
else:
|
476 |
+
loss = loss_fct(logits, labels)
|
477 |
+
elif self.config.problem_type == "single_label_classification":
|
478 |
+
loss_fct = CrossEntropyLoss()
|
479 |
+
loss = loss_fct(logits.view(-1, self.base_model.num_labels), labels.view(-1))
|
480 |
+
elif self.config.problem_type == "multi_label_classification":
|
481 |
+
loss_fct = BCEWithLogitsLoss()
|
482 |
+
loss = loss_fct(logits, labels)
|
483 |
+
if not return_dict:
|
484 |
+
output = (logits,) + outputs[2:]
|
485 |
+
return ((loss,) + output) if loss is not None else output
|
486 |
+
|
487 |
+
return SequenceClassifierOutput(
|
488 |
+
loss=loss,
|
489 |
+
logits=logits,
|
490 |
+
hidden_states=outputs.hidden_states,
|
491 |
+
attentions=outputs.attentions,
|
492 |
+
)
|
493 |
+
|
494 |
+
|
495 |
+
class PeftModelForCausalLM(PeftModel):
|
496 |
+
"""
|
497 |
+
Peft model for Causal LM
|
498 |
+
|
499 |
+
Args:
|
500 |
+
model ([`PreTrainedModel`]): Base transformer model
|
501 |
+
peft_config ([`PeftConfig`]): Peft config.
|
502 |
+
|
503 |
+
|
504 |
+
Example::
|
505 |
+
|
506 |
+
>>> from transformers import AutoModelForCausalLM >>> from peft import PeftModelForCausalLM, get_peft_config
|
507 |
+
>>> config = {
|
508 |
+
'peft_type': 'PREFIX_TUNING', 'task_type': 'CAUSAL_LM', 'inference_mode': False, 'num_virtual_tokens':
|
509 |
+
20, 'token_dim': 1280, 'num_transformer_submodules': 1, 'num_attention_heads': 20, 'num_layers': 36,
|
510 |
+
'encoder_hidden_size': 1280, 'prefix_projection': False, 'postprocess_past_key_value_function': None
|
511 |
+
}
|
512 |
+
>>> peft_config = get_peft_config(config) >>> model = AutoModelForCausalLM.from_pretrained("gpt2-large") >>>
|
513 |
+
peft_model = PeftModelForCausalLM(model, peft_config) >>> peft_model.print_trainable_parameters() trainable
|
514 |
+
params: 1843200 || all params: 775873280 || trainable%: 0.23756456724479544
|
515 |
+
"""
|
516 |
+
|
517 |
+
def __init__(self, model, peft_config: PeftConfig):
|
518 |
+
super().__init__(model, peft_config)
|
519 |
+
self.base_model_prepare_inputs_for_generation = self.base_model.prepare_inputs_for_generation
|
520 |
+
|
521 |
+
def forward(
|
522 |
+
self,
|
523 |
+
input_ids=None,
|
524 |
+
attention_mask=None,
|
525 |
+
inputs_embeds=None,
|
526 |
+
labels=None,
|
527 |
+
output_attentions=None,
|
528 |
+
output_hidden_states=None,
|
529 |
+
return_dict=None,
|
530 |
+
**kwargs,
|
531 |
+
):
|
532 |
+
if not isinstance(self.peft_config, PromptLearningConfig):
|
533 |
+
return self.base_model(
|
534 |
+
input_ids=input_ids,
|
535 |
+
attention_mask=attention_mask,
|
536 |
+
inputs_embeds=inputs_embeds,
|
537 |
+
labels=labels,
|
538 |
+
output_attentions=output_attentions,
|
539 |
+
output_hidden_states=output_hidden_states,
|
540 |
+
return_dict=return_dict,
|
541 |
+
**kwargs,
|
542 |
+
)
|
543 |
+
|
544 |
+
batch_size = input_ids.shape[0]
|
545 |
+
if attention_mask is not None:
|
546 |
+
# concat prompt attention mask
|
547 |
+
prefix_attention_mask = torch.ones(batch_size, self.peft_config.num_virtual_tokens).to(self.device)
|
548 |
+
attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=1)
|
549 |
+
|
550 |
+
if kwargs.get("position_ids", None) is not None:
|
551 |
+
warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
|
552 |
+
kwargs["position_ids"] = None
|
553 |
+
if kwargs.get("token_type_ids", None) is not None:
|
554 |
+
warnings.warn("Token type ids are not supported for parameter efficient tuning. Ignoring token type ids")
|
555 |
+
kwargs["token_type_ids"] = None
|
556 |
+
kwargs.update(
|
557 |
+
{
|
558 |
+
"attention_mask": attention_mask,
|
559 |
+
"labels": labels,
|
560 |
+
"output_attentions": output_attentions,
|
561 |
+
"output_hidden_states": output_hidden_states,
|
562 |
+
"return_dict": return_dict,
|
563 |
+
}
|
564 |
+
)
|
565 |
+
|
566 |
+
if self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
567 |
+
past_key_values = self.get_prompt(batch_size)
|
568 |
+
return self.base_model(input_ids=input_ids, past_key_values=past_key_values, **kwargs)
|
569 |
+
else:
|
570 |
+
if inputs_embeds is None:
|
571 |
+
inputs_embeds = self.word_embeddings(input_ids)
|
572 |
+
# concat prompt labels
|
573 |
+
if labels is not None:
|
574 |
+
prefix_labels = torch.full((batch_size, self.peft_config.num_virtual_tokens), -100).to(self.device)
|
575 |
+
kwargs["labels"] = torch.cat((prefix_labels, labels), dim=1)
|
576 |
+
prompts = self.get_prompt(batch_size=batch_size)
|
577 |
+
prompts = prompts.to(inputs_embeds.dtype)
|
578 |
+
inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
|
579 |
+
return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
|
580 |
+
|
581 |
+
def generate(self, **kwargs):
|
582 |
+
self.base_model.prepare_inputs_for_generation = self.prepare_inputs_for_generation
|
583 |
+
try:
|
584 |
+
if not isinstance(self.peft_config, PromptLearningConfig):
|
585 |
+
outputs = self.base_model.generate(**kwargs)
|
586 |
+
else:
|
587 |
+
if "input_ids" not in kwargs:
|
588 |
+
raise ValueError("input_ids must be provided for Peft model generation")
|
589 |
+
if kwargs.get("attention_mask", None) is not None:
|
590 |
+
# concat prompt attention mask
|
591 |
+
prefix_attention_mask = torch.ones(
|
592 |
+
kwargs["input_ids"].shape[0], self.peft_config.num_virtual_tokens
|
593 |
+
).to(kwargs["input_ids"].device)
|
594 |
+
kwargs["attention_mask"] = torch.cat((prefix_attention_mask, kwargs["attention_mask"]), dim=1)
|
595 |
+
|
596 |
+
if kwargs.get("position_ids", None) is not None:
|
597 |
+
warnings.warn(
|
598 |
+
"Position ids are not supported for parameter efficient tuning. Ignoring position ids."
|
599 |
+
)
|
600 |
+
kwargs["position_ids"] = None
|
601 |
+
if kwargs.get("token_type_ids", None) is not None:
|
602 |
+
warnings.warn(
|
603 |
+
"Token type ids are not supported for parameter efficient tuning. Ignoring token type ids"
|
604 |
+
)
|
605 |
+
kwargs["token_type_ids"] = None
|
606 |
+
|
607 |
+
outputs = self.base_model.generate(**kwargs)
|
608 |
+
except:
|
609 |
+
self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
|
610 |
+
raise
|
611 |
+
else:
|
612 |
+
self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
|
613 |
+
return outputs
|
614 |
+
|
615 |
+
def prepare_inputs_for_generation(self, *args, **kwargs):
|
616 |
+
model_kwargs = self.base_model_prepare_inputs_for_generation(*args, **kwargs)
|
617 |
+
if isinstance(self.peft_config, PromptLearningConfig):
|
618 |
+
if self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
619 |
+
prefix_attention_mask = torch.ones(
|
620 |
+
model_kwargs["input_ids"].shape[0], self.peft_config.num_virtual_tokens
|
621 |
+
).to(model_kwargs["input_ids"].device)
|
622 |
+
model_kwargs["attention_mask"] = torch.cat(
|
623 |
+
(prefix_attention_mask, model_kwargs["attention_mask"]), dim=1
|
624 |
+
)
|
625 |
+
|
626 |
+
if model_kwargs["past_key_values"] is None and self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
627 |
+
past_key_values = self.get_prompt(batch_size=model_kwargs["input_ids"].shape[0])
|
628 |
+
if self.base_model_torch_dtype is not None:
|
629 |
+
# handle the case for Bloom where it outputs tuple of tuples
|
630 |
+
if isinstance(past_key_values[0], tuple):
|
631 |
+
past_key_values = tuple(
|
632 |
+
tuple(
|
633 |
+
past_key_value.to(self.base_model_torch_dtype)
|
634 |
+
for past_key_value in past_key_value_tuple
|
635 |
+
)
|
636 |
+
for past_key_value_tuple in past_key_values
|
637 |
+
)
|
638 |
+
else:
|
639 |
+
past_key_values = tuple(
|
640 |
+
past_key_value.to(self.base_model_torch_dtype) for past_key_value in past_key_values
|
641 |
+
)
|
642 |
+
|
643 |
+
model_kwargs["past_key_values"] = past_key_values
|
644 |
+
else:
|
645 |
+
if model_kwargs["past_key_values"] is None:
|
646 |
+
inputs_embeds = self.word_embeddings(model_kwargs["input_ids"])
|
647 |
+
prompts = self.get_prompt(batch_size=model_kwargs["input_ids"].shape[0])
|
648 |
+
prompts = prompts.to(inputs_embeds.dtype)
|
649 |
+
model_kwargs["inputs_embeds"] = torch.cat((prompts, inputs_embeds), dim=1)
|
650 |
+
model_kwargs["input_ids"] = None
|
651 |
+
|
652 |
+
return model_kwargs
|
653 |
+
|
654 |
+
|
655 |
+
class PeftModelForSeq2SeqLM(PeftModel):
|
656 |
+
"""
|
657 |
+
Peft model for Seq2Seq LM
|
658 |
+
|
659 |
+
Args:
|
660 |
+
model ([`PreTrainedModel`]): Base transformer model
|
661 |
+
peft_config ([`PeftConfig`]): Peft config.
|
662 |
+
|
663 |
+
|
664 |
+
Example::
|
665 |
+
|
666 |
+
>>> from transformers import AutoModelForSeq2SeqLM >>> from peft import PeftModelForSeq2SeqLM, get_peft_config
|
667 |
+
>>> config = {
|
668 |
+
'peft_type': 'LORA', 'task_type': 'SEQ_2_SEQ_LM', 'inference_mode': False, 'r': 8, 'target_modules':
|
669 |
+
['q', 'v'], 'lora_alpha': 32, 'lora_dropout': 0.1, 'merge_weights': False, 'fan_in_fan_out': False,
|
670 |
+
'enable_lora': None, 'bias': 'none'
|
671 |
+
}
|
672 |
+
>>> peft_config = get_peft_config(config) >>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base") >>>
|
673 |
+
peft_model = PeftModelForSeq2SeqLM(model, peft_config) >>> peft_model.print_trainable_parameters() trainable
|
674 |
+
params: 884736 || all params: 223843584 || trainable%: 0.3952474242013566
|
675 |
+
"""
|
676 |
+
|
677 |
+
def __init__(self, model, peft_config: PeftConfig):
|
678 |
+
super().__init__(model, peft_config)
|
679 |
+
self.base_model_prepare_inputs_for_generation = self.base_model.prepare_inputs_for_generation
|
680 |
+
self.base_model_prepare_encoder_decoder_kwargs_for_generation = (
|
681 |
+
self.base_model._prepare_encoder_decoder_kwargs_for_generation
|
682 |
+
)
|
683 |
+
|
684 |
+
def forward(
|
685 |
+
self,
|
686 |
+
input_ids=None,
|
687 |
+
attention_mask=None,
|
688 |
+
inputs_embeds=None,
|
689 |
+
decoder_input_ids=None,
|
690 |
+
decoder_attention_mask=None,
|
691 |
+
decoder_inputs_embeds=None,
|
692 |
+
labels=None,
|
693 |
+
output_attentions=None,
|
694 |
+
output_hidden_states=None,
|
695 |
+
return_dict=None,
|
696 |
+
**kwargs,
|
697 |
+
):
|
698 |
+
if not isinstance(self.peft_config, PromptLearningConfig):
|
699 |
+
return self.base_model(
|
700 |
+
input_ids=input_ids,
|
701 |
+
attention_mask=attention_mask,
|
702 |
+
inputs_embeds=inputs_embeds,
|
703 |
+
decoder_input_ids=decoder_input_ids,
|
704 |
+
decoder_attention_mask=decoder_attention_mask,
|
705 |
+
decoder_inputs_embeds=decoder_inputs_embeds,
|
706 |
+
labels=labels,
|
707 |
+
output_attentions=output_attentions,
|
708 |
+
output_hidden_states=output_hidden_states,
|
709 |
+
return_dict=return_dict,
|
710 |
+
**kwargs,
|
711 |
+
)
|
712 |
+
|
713 |
+
batch_size = input_ids.shape[0]
|
714 |
+
if decoder_attention_mask is not None:
|
715 |
+
# concat prompt attention mask
|
716 |
+
prefix_attention_mask = torch.ones(batch_size, self.peft_config.num_virtual_tokens).to(self.device)
|
717 |
+
decoder_attention_mask = torch.cat((prefix_attention_mask, decoder_attention_mask), dim=1)
|
718 |
+
|
719 |
+
if kwargs.get("position_ids", None) is not None:
|
720 |
+
warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
|
721 |
+
kwargs["position_ids"] = None
|
722 |
+
if kwargs.get("token_type_ids", None) is not None:
|
723 |
+
warnings.warn("Token type ids are not supported for parameter efficient tuning. Ignoring token type ids")
|
724 |
+
kwargs["token_type_ids"] = None
|
725 |
+
kwargs.update(
|
726 |
+
{
|
727 |
+
"attention_mask": attention_mask,
|
728 |
+
"decoder_attention_mask": decoder_attention_mask,
|
729 |
+
"labels": labels,
|
730 |
+
"output_attentions": output_attentions,
|
731 |
+
"output_hidden_states": output_hidden_states,
|
732 |
+
"return_dict": return_dict,
|
733 |
+
}
|
734 |
+
)
|
735 |
+
|
736 |
+
if self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
737 |
+
past_key_values = self.get_prompt(batch_size)
|
738 |
+
return self.base_model(
|
739 |
+
input_ids=input_ids, decoder_input_ids=decoder_input_ids, past_key_values=past_key_values, **kwargs
|
740 |
+
)
|
741 |
+
else:
|
742 |
+
if inputs_embeds is None:
|
743 |
+
inputs_embeds = self.word_embeddings(input_ids)
|
744 |
+
if decoder_inputs_embeds is None and decoder_input_ids is None:
|
745 |
+
decoder_input_ids = shift_tokens_right(
|
746 |
+
labels, self.config.pad_token_id, self.config.decoder_start_token_id
|
747 |
+
)
|
748 |
+
decoder_inputs_embeds = self.word_embeddings(decoder_input_ids)
|
749 |
+
|
750 |
+
if attention_mask is not None:
|
751 |
+
# concat prompt attention mask
|
752 |
+
prefix_attention_mask = torch.ones(batch_size, self.peft_config.num_virtual_tokens).to(self.device)
|
753 |
+
kwargs["attention_mask"] = torch.cat((prefix_attention_mask, attention_mask), dim=1)
|
754 |
+
# concat prompt labels
|
755 |
+
if labels is not None:
|
756 |
+
if self.peft_config.num_transformer_submodules == 1:
|
757 |
+
kwargs["labels"] = labels
|
758 |
+
elif self.peft_config.num_transformer_submodules == 2:
|
759 |
+
prefix_labels = torch.full((batch_size, self.peft_config.num_virtual_tokens), -100).to(self.device)
|
760 |
+
kwargs["labels"] = torch.cat((prefix_labels, labels), dim=1)
|
761 |
+
prompts = self.get_prompt(batch_size=batch_size)
|
762 |
+
prompts = prompts.to(inputs_embeds.dtype)
|
763 |
+
inputs_embeds = torch.cat((prompts[:, : self.peft_config.num_virtual_tokens], inputs_embeds), dim=1)
|
764 |
+
if self.peft_config.num_transformer_submodules == 1:
|
765 |
+
return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
|
766 |
+
elif self.peft_config.num_transformer_submodules == 2:
|
767 |
+
decoder_inputs_embeds = torch.cat(
|
768 |
+
(prompts[:, self.peft_config.num_virtual_tokens :], decoder_inputs_embeds), dim=1
|
769 |
+
)
|
770 |
+
return self.base_model(
|
771 |
+
inputs_embeds=inputs_embeds, decoder_inputs_embeds=decoder_inputs_embeds, **kwargs
|
772 |
+
)
|
773 |
+
|
774 |
+
def generate(self, **kwargs):
|
775 |
+
self.base_model.prepare_inputs_for_generation = self.prepare_inputs_for_generation
|
776 |
+
self.base_model._prepare_encoder_decoder_kwargs_for_generation = (
|
777 |
+
self._prepare_encoder_decoder_kwargs_for_generation
|
778 |
+
)
|
779 |
+
try:
|
780 |
+
if not isinstance(self.peft_config, PromptLearningConfig):
|
781 |
+
outputs = self.base_model.generate(**kwargs)
|
782 |
+
else:
|
783 |
+
if "input_ids" not in kwargs:
|
784 |
+
raise ValueError("input_ids must be provided for Peft model generation")
|
785 |
+
if kwargs.get("position_ids", None) is not None:
|
786 |
+
warnings.warn(
|
787 |
+
"Position ids are not supported for parameter efficient tuning. Ignoring position ids."
|
788 |
+
)
|
789 |
+
kwargs["position_ids"] = None
|
790 |
+
if kwargs.get("token_type_ids", None) is not None:
|
791 |
+
warnings.warn(
|
792 |
+
"Token type ids are not supported for parameter efficient tuning. Ignoring token type ids"
|
793 |
+
)
|
794 |
+
kwargs["token_type_ids"] = None
|
795 |
+
|
796 |
+
if self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
797 |
+
outputs = self.base_model.generate(**kwargs)
|
798 |
+
else:
|
799 |
+
raise NotImplementedError
|
800 |
+
except:
|
801 |
+
self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
|
802 |
+
self.base_model._prepare_encoder_decoder_kwargs_for_generation = (
|
803 |
+
self.base_model_prepare_encoder_decoder_kwargs_for_generation
|
804 |
+
)
|
805 |
+
raise
|
806 |
+
else:
|
807 |
+
self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
|
808 |
+
self.base_model._prepare_encoder_decoder_kwargs_for_generation = (
|
809 |
+
self.base_model_prepare_encoder_decoder_kwargs_for_generation
|
810 |
+
)
|
811 |
+
return outputs
|
812 |
+
|
813 |
+
def prepare_inputs_for_generation(self, *args, **kwargs):
|
814 |
+
model_kwargs = self.base_model_prepare_inputs_for_generation(*args, **kwargs)
|
815 |
+
if model_kwargs["past_key_values"] is None and self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
816 |
+
batch_size = model_kwargs["decoder_input_ids"].shape[0]
|
817 |
+
past_key_values = self.get_prompt(batch_size)
|
818 |
+
model_kwargs["past_key_values"] = past_key_values
|
819 |
+
return model_kwargs
|
820 |
+
|
821 |
+
|
822 |
+
class PeftModelForTokenClassification(PeftModel):
|
823 |
+
"""
|
824 |
+
Peft model for sequence classification tasks.
|
825 |
+
|
826 |
+
Args:
|
827 |
+
model ([`PreTrainedModel`]): Base transformer model
|
828 |
+
peft_config ([`PeftConfig`]): Peft config.
|
829 |
+
|
830 |
+
**Attributes**:
|
831 |
+
- **config** ([`PretrainedConfig`]) -- The configuration object of the base model.
|
832 |
+
- **cls_layer_name** (`str`) -- The name of the classification layer.
|
833 |
+
|
834 |
+
Example::
|
835 |
+
|
836 |
+
>>> from transformers import AutoModelForSequenceClassification >>> from peft import
|
837 |
+
PeftModelForTokenClassification, get_peft_config >>> config = {
|
838 |
+
'peft_type': 'PREFIX_TUNING', 'task_type': 'TOKEN_CLS', 'inference_mode': False, 'num_virtual_tokens':
|
839 |
+
20, 'token_dim': 768, 'num_transformer_submodules': 1, 'num_attention_heads': 12, 'num_layers': 12,
|
840 |
+
'encoder_hidden_size': 768, 'prefix_projection': False, 'postprocess_past_key_value_function': None
|
841 |
+
}
|
842 |
+
>>> peft_config = get_peft_config(config) >>> model =
|
843 |
+
AutoModelForTokenClassification.from_pretrained("bert-base-cased") >>> peft_model =
|
844 |
+
PeftModelForTokenClassification(model, peft_config) >>> peft_model.print_trainable_parameters() trainable
|
845 |
+
params: 370178 || all params: 108680450 || trainable%: 0.3406113979101117
|
846 |
+
"""
|
847 |
+
|
848 |
+
def __init__(self, model, peft_config: PeftConfig):
|
849 |
+
super().__init__(model, peft_config)
|
850 |
+
self.modules_to_save = ["classifier", "score"]
|
851 |
+
|
852 |
+
for name, _ in self.base_model.named_children():
|
853 |
+
if any(module_name in name for module_name in self.modules_to_save):
|
854 |
+
self.cls_layer_name = name
|
855 |
+
break
|
856 |
+
|
857 |
+
# to make sure classifier layer is trainable
|
858 |
+
_set_trainable(self)
|
859 |
+
|
860 |
+
def forward(
|
861 |
+
self,
|
862 |
+
input_ids=None,
|
863 |
+
attention_mask=None,
|
864 |
+
inputs_embeds=None,
|
865 |
+
labels=None,
|
866 |
+
output_attentions=None,
|
867 |
+
output_hidden_states=None,
|
868 |
+
return_dict=None,
|
869 |
+
**kwargs,
|
870 |
+
):
|
871 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
872 |
+
|
873 |
+
if not isinstance(self.peft_config, PromptLearningConfig):
|
874 |
+
return self.base_model(
|
875 |
+
input_ids=input_ids,
|
876 |
+
attention_mask=attention_mask,
|
877 |
+
inputs_embeds=inputs_embeds,
|
878 |
+
labels=labels,
|
879 |
+
output_attentions=output_attentions,
|
880 |
+
output_hidden_states=output_hidden_states,
|
881 |
+
return_dict=return_dict,
|
882 |
+
**kwargs,
|
883 |
+
)
|
884 |
+
|
885 |
+
batch_size = input_ids.shape[0]
|
886 |
+
if attention_mask is not None:
|
887 |
+
# concat prompt attention mask
|
888 |
+
prefix_attention_mask = torch.ones(batch_size, self.peft_config.num_virtual_tokens).to(self.device)
|
889 |
+
attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=1)
|
890 |
+
if kwargs.get("position_ids", None) is not None:
|
891 |
+
warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
|
892 |
+
kwargs["position_ids"] = None
|
893 |
+
kwargs.update(
|
894 |
+
{
|
895 |
+
"attention_mask": attention_mask,
|
896 |
+
"labels": labels,
|
897 |
+
"output_attentions": output_attentions,
|
898 |
+
"output_hidden_states": output_hidden_states,
|
899 |
+
"return_dict": return_dict,
|
900 |
+
}
|
901 |
+
)
|
902 |
+
|
903 |
+
if self.peft_config.peft_type == PeftType.PREFIX_TUNING:
|
904 |
+
return self._prefix_tuning_forward(input_ids=input_ids, **kwargs)
|
905 |
+
else:
|
906 |
+
if kwargs.get("token_type_ids", None) is not None:
|
907 |
+
kwargs["token_type_ids"] = torch.cat(
|
908 |
+
(
|
909 |
+
torch.zeros(batch_size, self.peft_config.num_virtual_tokens).to(self.device),
|
910 |
+
kwargs["token_type_ids"],
|
911 |
+
),
|
912 |
+
dim=1,
|
913 |
+
).long()
|
914 |
+
if inputs_embeds is None:
|
915 |
+
inputs_embeds = self.word_embeddings(input_ids)
|
916 |
+
prompts = self.get_prompt(batch_size=batch_size)
|
917 |
+
prompts = prompts.to(inputs_embeds.dtype)
|
918 |
+
inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
|
919 |
+
return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
|
920 |
+
|
921 |
+
def _prefix_tuning_forward(
|
922 |
+
self,
|
923 |
+
input_ids=None,
|
924 |
+
attention_mask=None,
|
925 |
+
inputs_embeds=None,
|
926 |
+
labels=None,
|
927 |
+
output_attentions=None,
|
928 |
+
output_hidden_states=None,
|
929 |
+
return_dict=None,
|
930 |
+
**kwargs,
|
931 |
+
):
|
932 |
+
batch_size = input_ids.shape[0]
|
933 |
+
past_key_values = self.get_prompt(batch_size)
|
934 |
+
fwd_params = list(inspect.signature(self.base_model.forward).parameters.keys())
|
935 |
+
kwargs.update(
|
936 |
+
{
|
937 |
+
"input_ids": input_ids,
|
938 |
+
"attention_mask": attention_mask,
|
939 |
+
"inputs_embeds": inputs_embeds,
|
940 |
+
"output_attentions": output_attentions,
|
941 |
+
"output_hidden_states": output_hidden_states,
|
942 |
+
"return_dict": return_dict,
|
943 |
+
"past_key_values": past_key_values,
|
944 |
+
}
|
945 |
+
)
|
946 |
+
if "past_key_values" in fwd_params:
|
947 |
+
return self.base_model(labels=labels, **kwargs)
|
948 |
+
else:
|
949 |
+
transformer_backbone_name = self.base_model.get_submodule(self.transformer_backbone_name)
|
950 |
+
fwd_params = list(inspect.signature(transformer_backbone_name.forward).parameters.keys())
|
951 |
+
if "past_key_values" not in fwd_params:
|
952 |
+
raise ValueError("Model does not support past key values which are required for prefix tuning.")
|
953 |
+
outputs = transformer_backbone_name(**kwargs)
|
954 |
+
sequence_output = outputs[0]
|
955 |
+
if "dropout" in [name for name, _ in list(self.base_model.named_children())]:
|
956 |
+
sequence_output = self.base_model.dropout(sequence_output)
|
957 |
+
logits = self.base_model.get_submodule(self.cls_layer_name)(sequence_output)
|
958 |
+
|
959 |
+
loss = None
|
960 |
+
loss = None
|
961 |
+
if labels is not None:
|
962 |
+
loss_fct = CrossEntropyLoss()
|
963 |
+
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
964 |
+
|
965 |
+
if not return_dict:
|
966 |
+
output = (logits,) + outputs[2:]
|
967 |
+
return ((loss,) + output) if loss is not None else output
|
968 |
+
|
969 |
+
return TokenClassifierOutput(
|
970 |
+
loss=loss,
|
971 |
+
logits=logits,
|
972 |
+
hidden_states=outputs.hidden_states,
|
973 |
+
attentions=outputs.attentions,
|
974 |
+
)
|
SVFT-main/LLM-Adapters/peft/src/peft/tuners/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# flake8: noqa
|
2 |
+
# There's no way to ignore "F401 '...' imported but unused" warnings in this
|
3 |
+
# module, but to preserve other warnings. So, don't check this module at all
|
4 |
+
|
5 |
+
# coding=utf-8
|
6 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
7 |
+
#
|
8 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
9 |
+
# you may not use this file except in compliance with the License.
|
10 |
+
# You may obtain a copy of the License at
|
11 |
+
#
|
12 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13 |
+
#
|
14 |
+
# Unless required by applicable law or agreed to in writing, software
|
15 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
17 |
+
# See the License for the specific language governing permissions and
|
18 |
+
# limitations under the License.
|
19 |
+
|
20 |
+
from .lora import LoraConfig, LoraModel
|
21 |
+
from .bottleneck import BottleneckConfig, BottleneckModel
|
22 |
+
from .p_tuning import PromptEncoder, PromptEncoderConfig, PromptEncoderReparameterizationType
|
23 |
+
from .prefix_tuning import PrefixEncoder, PrefixTuningConfig
|
24 |
+
from .prompt_tuning import PromptEmbedding, PromptTuningConfig, PromptTuningInit
|
SVFT-main/LLM-Adapters/peft/src/peft/tuners/bottleneck.py
ADDED
@@ -0,0 +1,532 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import importlib
|
2 |
+
import math
|
3 |
+
import re
|
4 |
+
import warnings
|
5 |
+
from dataclasses import asdict, dataclass, field
|
6 |
+
from enum import Enum
|
7 |
+
from typing import List, Optional, Union
|
8 |
+
|
9 |
+
import torch
|
10 |
+
import torch.nn as nn
|
11 |
+
import torch.nn.functional as F
|
12 |
+
|
13 |
+
from ..utils import PeftConfig, PeftType, transpose
|
14 |
+
from transformers.activations import ACT2FN
|
15 |
+
|
16 |
+
|
17 |
+
TRANSFORMERS_MODELS_TO_ADAPTER_TYPE_MAPPING = {
|
18 |
+
"bloom": {"dense_h_to_4h": "mh_adapter", "dense_4h_to_h": "output_adapter"},
|
19 |
+
"gptj": {"fc_in":"mh_adapter", "fc_out":"output_adapter"},
|
20 |
+
"gpt_neo": {"c_fc":"mh_adapter", "c_proj":"output_adapter"},
|
21 |
+
"llama": {"gate_proj": "mh_adapter", "up_proj":"mh_adapter", "down_proj":"output_adapter"},
|
22 |
+
"opt": {"fc1":"mh_adapter", "fc2":"output_adapter"},
|
23 |
+
"chatglm": {"dense_h_to_4h": "mh_adapter", "dense_4h_to_h": "output_adapter"},
|
24 |
+
}
|
25 |
+
|
26 |
+
def is_bnb_available():
|
27 |
+
return importlib.util.find_spec("bitsandbytes") is not None
|
28 |
+
|
29 |
+
|
30 |
+
if is_bnb_available():
|
31 |
+
import bitsandbytes as bnb
|
32 |
+
|
33 |
+
@dataclass
|
34 |
+
class BottleneckConfig(PeftConfig):
|
35 |
+
"""
|
36 |
+
This is the configuration class to store the configuration of a [`~peft.Bottleneck`].
|
37 |
+
|
38 |
+
Args:
|
39 |
+
bottleneck_size (`int`): The size of the bottleneck.
|
40 |
+
non_linearity (`str`): The non-linearity to apply to the bottleneck.
|
41 |
+
dropout (`float`, optional): The dropout probability of the bottleneck. Default to 0.0
|
42 |
+
bias ('str'): Bias type for Bottleneck. Can be 'none', 'all' or 'adapter_only'. Default to 'none'.
|
43 |
+
use_parallel_adapter (:obj:`bool`, optional): Whether to use parallel adapter. Defaults to False.
|
44 |
+
scaling (:obj:`float` or :obj:`str`, optional):
|
45 |
+
Scaling factor to use for scaled addition of adapter outputs as done by He et al. (2021). Can be either a
|
46 |
+
constant factor (float) or the string "learned", in which case the scaling factor is learned. Defaults to
|
47 |
+
1.0.
|
48 |
+
target_modules (`Union[List[str],str]`): The names of the modules to apply Adapter to.
|
49 |
+
init_weights (:obj:`str`, optional): Initialization method for the weights of the adapter modules.
|
50 |
+
Currently, this can be either "bert" (default) or "mam_adapter".
|
51 |
+
modules_to_save (`List[str]`):List of modules apart from Bottleneck adapter layers to be set as trainable
|
52 |
+
and saved in the final checkpoint.
|
53 |
+
"""
|
54 |
+
bottleneck_size : int = field(default=256, metadata={"help": "The size of the bottleneck"})
|
55 |
+
non_linearity : str = field(default="tanh", metadata={"help": "The non-linearity to apply to the bottleneck"})
|
56 |
+
adapter_dropout : float = field(default=0.0, metadata={"help": "The dropout probability of the bottleneck, default to 0.0"})
|
57 |
+
target_modules: Optional[Union[List[str], str]] = field(
|
58 |
+
default=None,
|
59 |
+
metadata={
|
60 |
+
"help": "List of module names or regex expression of the module names to replace with Adapter."
|
61 |
+
"For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' "
|
62 |
+
},
|
63 |
+
)
|
64 |
+
use_parallel_adapter: bool = field(default=False, metadata={"help": "Whether to use parallel adapter"})
|
65 |
+
use_adapterp: bool = field(default=False, metadata={"help": "Whether to use adapterp"})
|
66 |
+
scaling: Union[float, str] = 1.0
|
67 |
+
bias: str = field(default="none", metadata={"help": "Bias type for Bottleneck. Can be 'none', 'all' or 'adapter_only'"})
|
68 |
+
init_weights: str = field(default="bert", metadata={"help": "Initialization method for the weights of the adapter modules."})
|
69 |
+
modules_to_save: Optional[List[str]] = field(
|
70 |
+
default=None,
|
71 |
+
metadata={
|
72 |
+
"help": "List of modules apart from Adapter layers to be set as trainable and saved in the final checkpoint. "
|
73 |
+
"For example, in Sequence Classification or Token Classification tasks, "
|
74 |
+
"the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved."
|
75 |
+
},
|
76 |
+
)
|
77 |
+
|
78 |
+
def __post_init__(self):
|
79 |
+
self.peft_type = PeftType.BOTTLENECK
|
80 |
+
|
81 |
+
|
82 |
+
class BottleneckModel(torch.nn.Module):
|
83 |
+
"""
|
84 |
+
Creates Bottleneck adapter model for a pretrained trainsformers model.
|
85 |
+
|
86 |
+
Args:
|
87 |
+
model ('transformers.PreTrainedModel'): The pretrained model to be adapted.
|
88 |
+
config (`BottleneckConfig`): The configuration of the Bottleneck adapter.
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
`torch.nn.Module`: The Bottleneck adapter model.
|
92 |
+
|
93 |
+
Example::
|
94 |
+
|
95 |
+
>>> from transformers import AutoModelForCausalLM, BottleneckConfig
|
96 |
+
>>> from peft import BottleneckModel, BottleneckConfig
|
97 |
+
>>> config = BottleneckConfig(
|
98 |
+
peft_type="BOTTLNECK", task="CAUSAL_LM", target_modules=["gate_proj", "up_proj", "down_proj"],
|
99 |
+
bottleneck_size=256, non_linearity="tanh",
|
100 |
+
)
|
101 |
+
>>> model = AutoModelForCausalLM.from_pretrained("decapoda-research/llama-7b-hf")
|
102 |
+
>>> bottleneck_model = BottleneckModel(config, model)
|
103 |
+
|
104 |
+
**Attribute**:
|
105 |
+
- **model** (`transformers.PreTrainedModel`): The pretrained model to be adapted.
|
106 |
+
- **peft_config** (`BottleneckConfig`): The configuration of the Bottleneck adapter.
|
107 |
+
"""
|
108 |
+
|
109 |
+
def __init__(self, config, model):
|
110 |
+
super().__init__()
|
111 |
+
self.model = model
|
112 |
+
self.peft_config = config
|
113 |
+
self._find_and_replace()
|
114 |
+
mark_only_adapter_as_trainable(self.model, self.peft_config.bias)
|
115 |
+
self.forward = self.model.forward
|
116 |
+
|
117 |
+
def _find_and_replace(self):
|
118 |
+
loaded_in_8bit = getattr(self.model, "is_loaded_in_8bit", False)
|
119 |
+
if loaded_in_8bit and not is_bnb_available():
|
120 |
+
raise ImportError(
|
121 |
+
"To use Adapter with 8-bit quantization, please install the `bitsandbytes` package. "
|
122 |
+
"You can install it with `pip install bitsandbytes`."
|
123 |
+
)
|
124 |
+
is_target_modules_in_base_model = False
|
125 |
+
is_hf_device_map_available = hasattr(self.model, "hf_device_map")
|
126 |
+
kwargs = {
|
127 |
+
"bottleneck_size": self.peft_config.bottleneck_size,
|
128 |
+
"non_linearity": self.peft_config.non_linearity,
|
129 |
+
"adapter_dropout": self.peft_config.adapter_dropout,
|
130 |
+
"scaling": self.peft_config.scaling,
|
131 |
+
"init_weights": self.peft_config.init_weights,
|
132 |
+
}
|
133 |
+
key_list = [key for key, _ in self.model.named_modules()]
|
134 |
+
for key in key_list:
|
135 |
+
if isinstance(self.peft_config.target_modules, str):
|
136 |
+
target_module_found = re.fullmatch(self.peft_config.target_modules, key)
|
137 |
+
else:
|
138 |
+
target_module_found = any(key.endswith(target_key) for target_key in self.peft_config.target_modules)
|
139 |
+
if target_module_found:
|
140 |
+
if not is_target_modules_in_base_model:
|
141 |
+
is_target_modules_in_base_model = True
|
142 |
+
parent, target, target_name = self._get_submodules(key)
|
143 |
+
# determine the type of adapter to be used, this will effect the forward pass
|
144 |
+
if self.peft_config.use_parallel_adapter:
|
145 |
+
adapter_type = "parallel_adapter"
|
146 |
+
else:
|
147 |
+
adapter_type = TRANSFORMERS_MODELS_TO_ADAPTER_TYPE_MAPPING[self.model.config.model_type][target_name]
|
148 |
+
kwargs.update({"adapter_type": adapter_type})
|
149 |
+
|
150 |
+
bias = target.bias is not None
|
151 |
+
if loaded_in_8bit and isinstance(target, bnb.nn.Linear8bitLt):
|
152 |
+
kwargs.update(
|
153 |
+
{
|
154 |
+
"has_fp16_weights": target.state.has_fp16_weights,
|
155 |
+
"memory_efficient_backward": target.state.memory_efficient_backward,
|
156 |
+
"threshold": target.state.threshold,
|
157 |
+
"index": target.index,
|
158 |
+
}
|
159 |
+
)
|
160 |
+
if adapter_type == "mh_adapter":
|
161 |
+
new_module = Linear8bitLt(target.in_features, target.in_features, bias=bias, **kwargs)
|
162 |
+
elif adapter_type == "output_adapter":
|
163 |
+
new_module = Linear8bitLt(target.out_features, target.out_features, bias=bias, **kwargs)
|
164 |
+
elif adapter_type == "parallel_adapter":
|
165 |
+
new_module = Linear8bitLt(target.in_features, target.out_features, bias=bias, **kwargs)
|
166 |
+
elif isinstance(target, torch.nn.Linear):
|
167 |
+
if adapter_type == "mh_adapter":
|
168 |
+
new_module = Linear(target.in_features, target.in_features, bias=bias, **kwargs)
|
169 |
+
elif adapter_type == "output_adapter":
|
170 |
+
new_module = Linear(target.out_features, target.out_features, bias=bias, **kwargs)
|
171 |
+
elif adapter_type == "parallel_adapter":
|
172 |
+
new_module = Linear(target.in_features, target.out_features, bias=bias, **kwargs)
|
173 |
+
self._replace_module(parent, target_name, new_module, target)
|
174 |
+
if not is_target_modules_in_base_model:
|
175 |
+
raise ValueError(
|
176 |
+
f"Target modules {self.peft_config.target_modules} not found in the base model. "
|
177 |
+
f"Please check the target modules and try again."
|
178 |
+
)
|
179 |
+
|
180 |
+
def _get_submodules(self, key):
|
181 |
+
parent = self.model.get_submodule(".".join(key.split(".")[:-1]))
|
182 |
+
target_name = key.split(".")[-1]
|
183 |
+
target = self.model.get_submodule(key)
|
184 |
+
return parent, target, target_name
|
185 |
+
|
186 |
+
def _replace_module(self, parent_module, child_name, new_module, old_module):
|
187 |
+
setattr(parent_module, child_name, new_module)
|
188 |
+
new_module.weight = old_module.weight
|
189 |
+
if old_module.bias is not None:
|
190 |
+
new_module.bias = old_module.bias
|
191 |
+
if getattr(old_module, "state", None) is not None:
|
192 |
+
new_module.state = old_module.state
|
193 |
+
new_module.to(old_module.weight.device)
|
194 |
+
|
195 |
+
# dispatch to correct device
|
196 |
+
for name, module in new_module.named_modules():
|
197 |
+
if "adapter_" in name:
|
198 |
+
module.to(old_module.weight.device)
|
199 |
+
|
200 |
+
def __getattr__(self, name: str):
|
201 |
+
"""Forward missing attributes to the wrapped module."""
|
202 |
+
try:
|
203 |
+
return super().__getattr__(name) # defer to nn.Module's logic
|
204 |
+
except AttributeError:
|
205 |
+
return getattr(self.model, name)
|
206 |
+
|
207 |
+
@property
|
208 |
+
def modules_to_save(self):
|
209 |
+
return None
|
210 |
+
|
211 |
+
def get_peft_config_as_dict(self, inference: bool = False):
|
212 |
+
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(self.peft_config).items()}
|
213 |
+
if inference:
|
214 |
+
config["inference_mode"] = True
|
215 |
+
return config
|
216 |
+
|
217 |
+
def _set_adapter_layers(self, enabled=True):
|
218 |
+
for module in self.model.modules():
|
219 |
+
if isinstance(module, AdapterLayer):
|
220 |
+
module.disable_adapters = False if enabled else True
|
221 |
+
|
222 |
+
def enable_adapter_layers(self):
|
223 |
+
self._set_adapter_layers(enabled=True)
|
224 |
+
|
225 |
+
def disable_adapter_layers(self):
|
226 |
+
self._set_adapter_layers(enabled=False)
|
227 |
+
|
228 |
+
|
229 |
+
# Below code is based on https://github.com/adapter-hub/adapter-transformers/blob/master/src/transformers/adapters/modeling.py and lora.py from huggingfance PEFT
|
230 |
+
# and modified to work with PyTorch FSDP
|
231 |
+
|
232 |
+
|
233 |
+
# ------------------------------------------------------------------------------------------
|
234 |
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
235 |
+
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
236 |
+
# ------------------------------------------------------------------------------------------
|
237 |
+
|
238 |
+
|
239 |
+
# Copy from lora.py
|
240 |
+
# had to adapt it for `lora_only` to work
|
241 |
+
def mark_only_adapter_as_trainable(model: nn.Module, bias: str = "none") -> None:
|
242 |
+
for n, p in model.named_parameters():
|
243 |
+
if "adapter_" not in n:
|
244 |
+
p.requires_grad = False
|
245 |
+
if bias == "none":
|
246 |
+
return
|
247 |
+
elif bias == "all":
|
248 |
+
for n, p in model.named_parameters():
|
249 |
+
if "bias" in n:
|
250 |
+
p.requires_grad = True
|
251 |
+
elif bias == "adapter_only":
|
252 |
+
for m in model.modules():
|
253 |
+
if isinstance(m, AdapterLayer) and hasattr(m, "bias") and m.bias is not None:
|
254 |
+
m.bias.requires_grad = True
|
255 |
+
else:
|
256 |
+
raise NotImplementedError
|
257 |
+
|
258 |
+
|
259 |
+
class AdapterLayer:
|
260 |
+
def __init__(
|
261 |
+
self,
|
262 |
+
bottleneck_size: int,
|
263 |
+
non_linearity: str,
|
264 |
+
adapter_dropout: float,
|
265 |
+
scaling: Union[float, str],
|
266 |
+
):
|
267 |
+
self.bottleneck_size = bottleneck_size
|
268 |
+
self.non_linearity = non_linearity
|
269 |
+
self.scaling = scaling
|
270 |
+
#optional dropout
|
271 |
+
if adapter_dropout > 0.0:
|
272 |
+
self.adapter_dropout = nn.Dropout(p=adapter_dropout)
|
273 |
+
else:
|
274 |
+
self.adapter_dropout = lambda x: x
|
275 |
+
self.disable_adapters = False
|
276 |
+
|
277 |
+
|
278 |
+
class Linear(nn.Linear, AdapterLayer):
|
279 |
+
"""
|
280 |
+
Bottleneck adapter in a dense layer. The adapter can be applied after the multi-head attention layer and/or
|
281 |
+
after the feed-forward layer.
|
282 |
+
"""
|
283 |
+
def __init__(
|
284 |
+
self,
|
285 |
+
in_features: int,
|
286 |
+
out_features: int,
|
287 |
+
adapter_type: str,
|
288 |
+
bottleneck_size: int,
|
289 |
+
non_linearity: str,
|
290 |
+
adapter_dropout: float,
|
291 |
+
scaling: Union[float, str],
|
292 |
+
init_weights: str,
|
293 |
+
**kwargs,
|
294 |
+
):
|
295 |
+
nn.Linear.__init__(self, in_features, out_features, **kwargs)
|
296 |
+
AdapterLayer.__init__(self, bottleneck_size=bottleneck_size,
|
297 |
+
non_linearity=non_linearity,
|
298 |
+
adapter_dropout=adapter_dropout,
|
299 |
+
scaling=scaling)
|
300 |
+
|
301 |
+
self.init_weights = init_weights
|
302 |
+
self.adapter_type = adapter_type
|
303 |
+
if isinstance(scaling, float):
|
304 |
+
self.adapter_scaling = scaling
|
305 |
+
elif scaling == "learned":
|
306 |
+
self.adapter_scaling = nn.Parameter(torch.ones(1))
|
307 |
+
# Actual trainable parameters
|
308 |
+
self.adapter_down = nn.Linear(in_features, bottleneck_size, bias=False)
|
309 |
+
self.adapter_up = nn.Linear(bottleneck_size, out_features, bias=False)
|
310 |
+
self.act_fn = ACT2FN[self.non_linearity]
|
311 |
+
#Freezing the pre-trained weight matrix
|
312 |
+
self.weight.requires_grad = False
|
313 |
+
self.reset_parameters()
|
314 |
+
|
315 |
+
def reset_parameters(self):
|
316 |
+
nn.Linear.reset_parameters(self)
|
317 |
+
# if we want to initialize with the bert strategy then this function is called for all the linear layers
|
318 |
+
if hasattr(self, "adapter_down"):
|
319 |
+
if self.init_weights == "bert":
|
320 |
+
self.adapter_down.apply(self.init_bert_weights)
|
321 |
+
self.adapter_up.apply(self.init_bert_weights)
|
322 |
+
elif self.init_weights == "mam_adapter":
|
323 |
+
nn.init.kaiming_uniform_(self.adapter_down.weight, a=math.sqrt(5))
|
324 |
+
nn.init.zeros_(self.adapter_up.weight)
|
325 |
+
else:
|
326 |
+
raise ValueError("Unknown init_weights type: {}".format(config["init_weights"]))
|
327 |
+
|
328 |
+
# This is copied from the BertPreTrainedModel class to make this a self containing class.
|
329 |
+
@staticmethod
|
330 |
+
def init_bert_weights(module):
|
331 |
+
"""Initialize the weights."""
|
332 |
+
if isinstance(module, (nn.Linear, nn.Embedding)):
|
333 |
+
# std defaults to 0.02, this might need to be changed
|
334 |
+
module.weight.data.normal_(mean=0.0, std=0.02)
|
335 |
+
elif isinstance(module, nn.LayerNorm):
|
336 |
+
module.bias.data.zero_()
|
337 |
+
module.weight.data.fill_(1.0)
|
338 |
+
if isinstance(module, nn.Linear) and module.bias is not None:
|
339 |
+
module.bias.data.zero_()
|
340 |
+
|
341 |
+
def train(self, mode: bool = True):
|
342 |
+
nn.Linear.train(self, mode)
|
343 |
+
self.adapter_down.train(mode)
|
344 |
+
self.adapter_up.train(mode)
|
345 |
+
|
346 |
+
def eval(self):
|
347 |
+
nn.Linear.eval(self)
|
348 |
+
self.adapter_down.eval()
|
349 |
+
self.adapter_up.eval()
|
350 |
+
|
351 |
+
def forward(self, x: torch.Tensor):
|
352 |
+
if self.disable_adapters:
|
353 |
+
return F.linear(x, self.weight, bias=self.bias)
|
354 |
+
else:
|
355 |
+
if self.adapter_type == "mh_adapter":
|
356 |
+
# for mh_adapter, x will pass the adapter first and then the linear layer
|
357 |
+
expected_dtype = x.dtype
|
358 |
+
residual = x
|
359 |
+
|
360 |
+
if x.dtype != torch.float32:
|
361 |
+
x = x.float()
|
362 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(x)))).to(expected_dtype) * self.adapter_scaling
|
363 |
+
|
364 |
+
output = output + residual
|
365 |
+
|
366 |
+
result = F.linear(output, self.weight, bias=self.bias)
|
367 |
+
elif self.adapter_type == "output_adapter":
|
368 |
+
# for output_adapter, x will pass the linear layer first and then the adapter
|
369 |
+
x = F.linear(x, self.weight, bias=self.bias)
|
370 |
+
expected_dtype = x.dtype
|
371 |
+
residual = x
|
372 |
+
|
373 |
+
if x.dtype != torch.float32:
|
374 |
+
x = x.float()
|
375 |
+
|
376 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(x)))).to(expected_dtype) * self.adapter_scaling
|
377 |
+
|
378 |
+
result = output + residual
|
379 |
+
elif self.adapter_type == "parallel_adapter":
|
380 |
+
# for parallel_adapter, x will pass the linear layer first and the adapter layer parallelly.
|
381 |
+
# The output of the adapter layer will be added to the output of the linear layer
|
382 |
+
result = F.linear(x, self.weight, bias=self.bias)
|
383 |
+
expected_dtype = result.dtype
|
384 |
+
|
385 |
+
if x.dtype != torch.float32:
|
386 |
+
x = x.float()
|
387 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(x)))).to(expected_dtype) * self.adapter_scaling
|
388 |
+
|
389 |
+
result = result + output
|
390 |
+
return result
|
391 |
+
|
392 |
+
|
393 |
+
if is_bnb_available():
|
394 |
+
|
395 |
+
class Linear8bitLt(bnb.nn.Linear8bitLt, AdapterLayer):
|
396 |
+
# Aadapter layer for 8bit linear layer
|
397 |
+
def __init__(
|
398 |
+
self,
|
399 |
+
in_features: int,
|
400 |
+
out_features: int,
|
401 |
+
adapter_type: str,
|
402 |
+
bottleneck_size: int,
|
403 |
+
non_linearity: str,
|
404 |
+
adapter_dropout: float,
|
405 |
+
scaling: Union[float, str],
|
406 |
+
init_weights: str,
|
407 |
+
**kwargs,
|
408 |
+
):
|
409 |
+
bnb.nn.Linear8bitLt.__init__(
|
410 |
+
self,
|
411 |
+
in_features,
|
412 |
+
out_features,
|
413 |
+
bias=kwargs.get("bias", True),
|
414 |
+
has_fp16_weights=kwargs.get("has_fp16_weights", True),
|
415 |
+
memory_efficient_backward=kwargs.get("memory_efficient_backward", False),
|
416 |
+
threshold=kwargs.get("threshold", 0.0),
|
417 |
+
index=kwargs.get("index", None),
|
418 |
+
)
|
419 |
+
AdapterLayer.__init__(
|
420 |
+
self,
|
421 |
+
bottleneck_size=bottleneck_size,
|
422 |
+
non_linearity=non_linearity,
|
423 |
+
adapter_dropout=adapter_dropout,
|
424 |
+
scaling=scaling,)
|
425 |
+
|
426 |
+
self.init_weights = init_weights
|
427 |
+
self.adapter_type = adapter_type
|
428 |
+
if isinstance(scaling, float):
|
429 |
+
self.adapter_scaling = scaling
|
430 |
+
elif scaling == "learned":
|
431 |
+
self.adapter_scaling = nn.Parameter(torch.ones(1))
|
432 |
+
# Actual trainable parameters
|
433 |
+
self.adapter_down = nn.Linear(in_features, bottleneck_size, bias=False)
|
434 |
+
self.adapter_up = nn.Linear(bottleneck_size, out_features, bias=False)
|
435 |
+
self.act_fn = ACT2FN[self.non_linearity]
|
436 |
+
#Freezing the pre-trained weight matrix
|
437 |
+
self.weight.requires_grad = False
|
438 |
+
self.reset_parameters()
|
439 |
+
|
440 |
+
def reset_parameters(self):
|
441 |
+
nn.Linear.reset_parameters(self)
|
442 |
+
# if we want to initialize with the bert strategy then this function is called for all the linear layers
|
443 |
+
if hasattr(self, "adapter_down"):
|
444 |
+
if self.init_weights == "bert":
|
445 |
+
self.adapter_down.apply(self.init_bert_weights)
|
446 |
+
self.adapter_up.apply(self.init_bert_weights)
|
447 |
+
elif self.init_weights == "mam_adapter":
|
448 |
+
nn.init.kaiming_uniform_(self.adapter_down.weight, a=math.sqrt(5))
|
449 |
+
nn.init.zeros_(self.adapter_up.weight)
|
450 |
+
else:
|
451 |
+
raise ValueError("Unknown init_weights type: {}".format(config["init_weights"]))
|
452 |
+
|
453 |
+
# This is copied from the BertPreTrainedModel class to make this a self containing class.
|
454 |
+
@staticmethod
|
455 |
+
def init_bert_weights(module):
|
456 |
+
"""Initialize the weights."""
|
457 |
+
if isinstance(module, (nn.Linear, nn.Embedding)):
|
458 |
+
# std defaults to 0.02, this might need to be changed
|
459 |
+
module.weight.data.normal_(mean=0.0, std=0.02)
|
460 |
+
elif isinstance(module, nn.LayerNorm):
|
461 |
+
module.bias.data.zero_()
|
462 |
+
module.weight.data.fill_(1.0)
|
463 |
+
if isinstance(module, nn.Linear) and module.bias is not None:
|
464 |
+
module.bias.data.zero_()
|
465 |
+
|
466 |
+
def forward(self, x: torch.Tensor):
|
467 |
+
result_pre_forward = super().forward(x)
|
468 |
+
|
469 |
+
if self.disable_adapters:
|
470 |
+
return result_pre_forward
|
471 |
+
else:
|
472 |
+
if self.adapter_type == "mh_adapter":
|
473 |
+
if not torch.is_autocast_enabled():
|
474 |
+
expected_dtype = x.dtype
|
475 |
+
|
476 |
+
if x.dtype != torch.float32:
|
477 |
+
x = x.float()
|
478 |
+
|
479 |
+
residual = x
|
480 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(x)))).to(expected_dtype) * self.adapter_scaling
|
481 |
+
output = (output + residual).to(expected_dtype)
|
482 |
+
|
483 |
+
result = super().forward(output)
|
484 |
+
else:
|
485 |
+
residual = x
|
486 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(x)))) * self.adapter_scaling
|
487 |
+
output = output + residual
|
488 |
+
|
489 |
+
result = super().forward(output)
|
490 |
+
elif self.adapter_type == "output_adapter":
|
491 |
+
if not torch.is_autocast_enabled():
|
492 |
+
expected_dtype = result_pre_forward.dtype
|
493 |
+
|
494 |
+
if result_pre_forward.dtype != torch.float32:
|
495 |
+
result_pre_forward = result_pre_forward.float()
|
496 |
+
|
497 |
+
residual = result_pre_forward
|
498 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(result_pre_forward)))).to(expected_dtype) * self.adapter_scaling
|
499 |
+
result = (output + residual).to(expected_dtype)
|
500 |
+
else:
|
501 |
+
residual = result_pre_forward
|
502 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(result_pre_forward)))) * self.adapter_scaling
|
503 |
+
result = output + residual
|
504 |
+
elif self.adapter_type == "parallel_adapter":
|
505 |
+
if not torch.is_autocast_enabled():
|
506 |
+
expected_dtype = result_pre_forward.dtype
|
507 |
+
|
508 |
+
if x.dtype != torch.float32:
|
509 |
+
x = x.float()
|
510 |
+
|
511 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(x)))).to(expected_dtype) * self.adapter_scaling
|
512 |
+
result = result_pre_forward + output
|
513 |
+
else:
|
514 |
+
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(x)))) * self.adapter_scaling
|
515 |
+
result = result_pre_forward + output
|
516 |
+
|
517 |
+
return result
|
518 |
+
|
519 |
+
|
520 |
+
|
521 |
+
|
522 |
+
|
523 |
+
|
524 |
+
|
525 |
+
|
526 |
+
|
527 |
+
|
528 |
+
|
529 |
+
|
530 |
+
|
531 |
+
|
532 |
+
|
SVFT-main/LLM-Adapters/peft/src/peft/tuners/lora.py
ADDED
@@ -0,0 +1,624 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
import importlib
|
16 |
+
import math
|
17 |
+
import re
|
18 |
+
import warnings
|
19 |
+
from dataclasses import asdict, dataclass, field
|
20 |
+
from enum import Enum
|
21 |
+
from typing import List, Optional, Union
|
22 |
+
|
23 |
+
import torch
|
24 |
+
import torch.nn as nn
|
25 |
+
import torch.nn.functional as F
|
26 |
+
from transformers.pytorch_utils import Conv1D
|
27 |
+
|
28 |
+
from ..utils import PeftConfig, PeftType, transpose
|
29 |
+
|
30 |
+
|
31 |
+
def is_bnb_available():
|
32 |
+
return importlib.util.find_spec("bitsandbytes") is not None
|
33 |
+
|
34 |
+
|
35 |
+
if is_bnb_available():
|
36 |
+
import bitsandbytes as bnb
|
37 |
+
|
38 |
+
|
39 |
+
@dataclass
|
40 |
+
class LoraConfig(PeftConfig):
|
41 |
+
"""
|
42 |
+
This is the configuration class to store the configuration of a [`~peft.Lora`].
|
43 |
+
|
44 |
+
Args:
|
45 |
+
r (`int`): Lora attention dimension
|
46 |
+
target_modules (`Union[List[str],str]`): The names of the modules to apply Lora to.
|
47 |
+
lora_alpha (`float`): The alpha parameter for Lora scaling.
|
48 |
+
lora_dropout (`float`): The dropout probability for Lora layers.
|
49 |
+
merge_weights (`bool`):
|
50 |
+
Whether to merge the weights of the Lora layers with the base transformer model in `eval` mode.
|
51 |
+
fan_in_fan_out (`bool`): Set this to True if the layer to replace stores weight like (fan_in, fan_out)
|
52 |
+
enable_lora ( `List[bool]`): Used with `lora.MergedLinear`.
|
53 |
+
bias (`str`): Bias type for Lora. Can be 'none', 'all' or 'lora_only'
|
54 |
+
modules_to_save (`List[str]`):List of modules apart from LoRA layers to be set as trainable
|
55 |
+
and saved in the final checkpoint.
|
56 |
+
"""
|
57 |
+
|
58 |
+
r: int = field(default=8, metadata={"help": "Lora attention dimension"})
|
59 |
+
target_modules: Optional[Union[List[str], str]] = field(
|
60 |
+
default=None,
|
61 |
+
metadata={
|
62 |
+
"help": "List of module names or regex expression of the module names to replace with Lora."
|
63 |
+
"For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' "
|
64 |
+
},
|
65 |
+
)
|
66 |
+
lora_alpha: int = field(default=None, metadata={"help": "Lora alpha"})
|
67 |
+
lora_dropout: float = field(default=None, metadata={"help": "Lora dropout"})
|
68 |
+
merge_weights: bool = field(
|
69 |
+
default=False, metadata={"help": "Merge weights of the original model and the Lora model"}
|
70 |
+
)
|
71 |
+
fan_in_fan_out: bool = field(
|
72 |
+
default=False,
|
73 |
+
metadata={"help": "Set this to True if the layer to replace stores weight like (fan_in, fan_out)"},
|
74 |
+
)
|
75 |
+
enable_lora: Optional[List[bool]] = field(default=None, metadata={"help": "Used with `lora.MergedLinear`."})
|
76 |
+
bias: str = field(default="none", metadata={"help": "Bias type for Lora. Can be 'none', 'all' or 'lora_only'"})
|
77 |
+
modules_to_save: Optional[List[str]] = field(
|
78 |
+
default=None,
|
79 |
+
metadata={
|
80 |
+
"help": "List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint. "
|
81 |
+
"For example, in Sequence Classification or Token Classification tasks, "
|
82 |
+
"the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved."
|
83 |
+
},
|
84 |
+
)
|
85 |
+
|
86 |
+
def __post_init__(self):
|
87 |
+
self.peft_type = PeftType.LORA
|
88 |
+
|
89 |
+
|
90 |
+
class LoraModel(torch.nn.Module):
|
91 |
+
"""
|
92 |
+
Creates Low Rank Adapter (Lora) model from a pretrained transformers model.
|
93 |
+
|
94 |
+
Args:
|
95 |
+
model ([`transformers.PreTrainedModel`]): The model to be adapted.
|
96 |
+
config ([`LoraConfig`]): The configuration of the Lora model.
|
97 |
+
|
98 |
+
Returns:
|
99 |
+
`torch.nn.Module`: The Lora model.
|
100 |
+
|
101 |
+
Example::
|
102 |
+
|
103 |
+
>>> from transformers import AutoModelForSeq2SeqLM, LoraConfig >>> from peft import LoraModel, LoraConfig >>>
|
104 |
+
config = LoraConfig(
|
105 |
+
peft_type="LORA", task_type="SEQ_2_SEQ_LM", r=8, lora_alpha=32, target_modules=["q", "v"],
|
106 |
+
lora_dropout=0.01, )
|
107 |
+
>>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base") >>> lora_model = LoraModel(config, model)
|
108 |
+
|
109 |
+
**Attributes**:
|
110 |
+
- **model** ([`transformers.PreTrainedModel`]) -- The model to be adapted.
|
111 |
+
- **peft_config** ([`LoraConfig`]): The configuration of the Lora model.
|
112 |
+
"""
|
113 |
+
|
114 |
+
def __init__(self, config, model):
|
115 |
+
super().__init__()
|
116 |
+
self.peft_config = config
|
117 |
+
self.model = model
|
118 |
+
self._find_and_replace()
|
119 |
+
mark_only_lora_as_trainable(self.model, self.peft_config.bias)
|
120 |
+
self.forward = self.model.forward
|
121 |
+
|
122 |
+
def _find_and_replace(self):
|
123 |
+
loaded_in_8bit = getattr(self.model, "is_loaded_in_8bit", False)
|
124 |
+
if loaded_in_8bit and not is_bnb_available():
|
125 |
+
raise ImportError(
|
126 |
+
"To use Lora with 8-bit quantization, please install the `bitsandbytes` package. "
|
127 |
+
"You can install it with `pip install bitsandbytes`."
|
128 |
+
)
|
129 |
+
is_target_modules_in_base_model = False
|
130 |
+
is_hf_device_map_available = hasattr(self.model, "hf_device_map")
|
131 |
+
kwargs = {
|
132 |
+
"r": self.peft_config.r,
|
133 |
+
"lora_alpha": self.peft_config.lora_alpha,
|
134 |
+
"lora_dropout": self.peft_config.lora_dropout,
|
135 |
+
"fan_in_fan_out": self.peft_config.fan_in_fan_out,
|
136 |
+
"merge_weights": (self.peft_config.merge_weights or self.peft_config.inference_mode)
|
137 |
+
and not is_hf_device_map_available,
|
138 |
+
}
|
139 |
+
key_list = [key for key, _ in self.model.named_modules()]
|
140 |
+
for key in key_list:
|
141 |
+
if isinstance(self.peft_config.target_modules, str):
|
142 |
+
target_module_found = re.fullmatch(self.peft_config.target_modules, key)
|
143 |
+
else:
|
144 |
+
target_module_found = any(key.endswith(target_key) for target_key in self.peft_config.target_modules)
|
145 |
+
if target_module_found:
|
146 |
+
if not is_target_modules_in_base_model:
|
147 |
+
is_target_modules_in_base_model = True
|
148 |
+
parent, target, target_name = self._get_submodules(key)
|
149 |
+
bias = target.bias is not None
|
150 |
+
if loaded_in_8bit and isinstance(target, bnb.nn.Linear8bitLt):
|
151 |
+
kwargs.update(
|
152 |
+
{
|
153 |
+
"has_fp16_weights": target.state.has_fp16_weights,
|
154 |
+
"memory_efficient_backward": target.state.memory_efficient_backward,
|
155 |
+
"threshold": target.state.threshold,
|
156 |
+
"index": target.index,
|
157 |
+
}
|
158 |
+
)
|
159 |
+
if self.peft_config.enable_lora is None:
|
160 |
+
new_module = Linear8bitLt(target.in_features, target.out_features, bias=bias, **kwargs)
|
161 |
+
else:
|
162 |
+
kwargs.update({"enable_lora": self.peft_config.enable_lora})
|
163 |
+
new_module = MergedLinear8bitLt(target.in_features, target.out_features, bias=bias, **kwargs)
|
164 |
+
elif isinstance(target, torch.nn.Linear) and self.peft_config.enable_lora is None:
|
165 |
+
new_module = Linear(target.in_features, target.out_features, bias=bias, **kwargs)
|
166 |
+
elif self.peft_config.enable_lora is not None:
|
167 |
+
kwargs.update({"enable_lora": self.peft_config.enable_lora})
|
168 |
+
if isinstance(target, Conv1D):
|
169 |
+
in_features, out_features = (
|
170 |
+
target.weight.ds_shape if hasattr(target.weight, "ds_shape") else target.weight.shape
|
171 |
+
)
|
172 |
+
else:
|
173 |
+
in_features, out_features = target.in_features, target.out_features
|
174 |
+
if kwargs["fan_in_fan_out"]:
|
175 |
+
warnings.warn(
|
176 |
+
"fan_in_fan_out is set to True but the target module is not a Conv1D. "
|
177 |
+
"Setting fan_in_fan_out to False."
|
178 |
+
)
|
179 |
+
kwargs["fan_in_fan_out"] = self.peft_config.fan_in_fan_out = False
|
180 |
+
new_module = MergedLinear(in_features, out_features, bias=bias, **kwargs)
|
181 |
+
self._replace_module(parent, target_name, new_module, target)
|
182 |
+
if not is_target_modules_in_base_model:
|
183 |
+
raise ValueError(
|
184 |
+
f"Target modules {self.peft_config.target_modules} not found in the base model. "
|
185 |
+
f"Please check the target modules and try again."
|
186 |
+
)
|
187 |
+
|
188 |
+
def _get_submodules(self, key):
|
189 |
+
parent = self.model.get_submodule(".".join(key.split(".")[:-1]))
|
190 |
+
target_name = key.split(".")[-1]
|
191 |
+
target = self.model.get_submodule(key)
|
192 |
+
return parent, target, target_name
|
193 |
+
|
194 |
+
def _replace_module(self, parent_module, child_name, new_module, old_module):
|
195 |
+
setattr(parent_module, child_name, new_module)
|
196 |
+
new_module.weight = old_module.weight
|
197 |
+
if old_module.bias is not None:
|
198 |
+
new_module.bias = old_module.bias
|
199 |
+
if getattr(old_module, "state", None) is not None:
|
200 |
+
new_module.state = old_module.state
|
201 |
+
new_module.to(old_module.weight.device)
|
202 |
+
|
203 |
+
# dispatch to correct device
|
204 |
+
for name, module in new_module.named_modules():
|
205 |
+
if "lora_" in name:
|
206 |
+
module.to(old_module.weight.device)
|
207 |
+
|
208 |
+
def __getattr__(self, name: str):
|
209 |
+
"""Forward missing attributes to the wrapped module."""
|
210 |
+
try:
|
211 |
+
return super().__getattr__(name) # defer to nn.Module's logic
|
212 |
+
except AttributeError:
|
213 |
+
return getattr(self.model, name)
|
214 |
+
|
215 |
+
@property
|
216 |
+
def modules_to_save(self):
|
217 |
+
return None
|
218 |
+
|
219 |
+
def get_peft_config_as_dict(self, inference: bool = False):
|
220 |
+
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(self.peft_config).items()}
|
221 |
+
if inference:
|
222 |
+
config["inference_mode"] = True
|
223 |
+
return config
|
224 |
+
|
225 |
+
def _set_adapter_layers(self, enabled=True):
|
226 |
+
for module in self.model.modules():
|
227 |
+
if isinstance(module, LoraLayer):
|
228 |
+
module.disable_adapters = False if enabled else True
|
229 |
+
|
230 |
+
def enable_adapter_layers(self):
|
231 |
+
self._set_adapter_layers(enabled=True)
|
232 |
+
|
233 |
+
def disable_adapter_layers(self):
|
234 |
+
self._set_adapter_layers(enabled=False)
|
235 |
+
|
236 |
+
|
237 |
+
# Below code is based on https://github.com/microsoft/LoRA/blob/main/loralib/layers.py
|
238 |
+
# and modified to work with PyTorch FSDP
|
239 |
+
|
240 |
+
|
241 |
+
# ------------------------------------------------------------------------------------------
|
242 |
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
243 |
+
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
|
244 |
+
# ------------------------------------------------------------------------------------------
|
245 |
+
|
246 |
+
|
247 |
+
# had to adapt it for `lora_only` to work
|
248 |
+
def mark_only_lora_as_trainable(model: nn.Module, bias: str = "none") -> None:
|
249 |
+
for n, p in model.named_parameters():
|
250 |
+
if "lora_" not in n:
|
251 |
+
p.requires_grad = False
|
252 |
+
if bias == "none":
|
253 |
+
return
|
254 |
+
elif bias == "all":
|
255 |
+
for n, p in model.named_parameters():
|
256 |
+
if "bias" in n:
|
257 |
+
p.requires_grad = True
|
258 |
+
elif bias == "lora_only":
|
259 |
+
for m in model.modules():
|
260 |
+
if isinstance(m, LoraLayer) and hasattr(m, "bias") and m.bias is not None:
|
261 |
+
m.bias.requires_grad = True
|
262 |
+
else:
|
263 |
+
raise NotImplementedError
|
264 |
+
|
265 |
+
|
266 |
+
class LoraLayer:
|
267 |
+
def __init__(
|
268 |
+
self,
|
269 |
+
r: int,
|
270 |
+
lora_alpha: int,
|
271 |
+
lora_dropout: float,
|
272 |
+
merge_weights: bool,
|
273 |
+
):
|
274 |
+
self.r = r
|
275 |
+
self.lora_alpha = lora_alpha
|
276 |
+
# Optional dropout
|
277 |
+
if lora_dropout > 0.0:
|
278 |
+
self.lora_dropout = nn.Dropout(p=lora_dropout)
|
279 |
+
else:
|
280 |
+
self.lora_dropout = lambda x: x
|
281 |
+
# Mark the weight as unmerged
|
282 |
+
self.merged = False
|
283 |
+
self.merge_weights = merge_weights
|
284 |
+
self.disable_adapters = False
|
285 |
+
|
286 |
+
|
287 |
+
class Linear(nn.Linear, LoraLayer):
|
288 |
+
# Lora implemented in a dense layer
|
289 |
+
def __init__(
|
290 |
+
self,
|
291 |
+
in_features: int,
|
292 |
+
out_features: int,
|
293 |
+
r: int = 0,
|
294 |
+
lora_alpha: int = 1,
|
295 |
+
lora_dropout: float = 0.0,
|
296 |
+
fan_in_fan_out: bool = False, # Set this to True if the layer to replace stores weight like (fan_in, fan_out)
|
297 |
+
merge_weights: bool = True,
|
298 |
+
**kwargs,
|
299 |
+
):
|
300 |
+
nn.Linear.__init__(self, in_features, out_features, **kwargs)
|
301 |
+
LoraLayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, merge_weights=merge_weights)
|
302 |
+
|
303 |
+
self.fan_in_fan_out = fan_in_fan_out
|
304 |
+
# Actual trainable parameters
|
305 |
+
if r > 0:
|
306 |
+
self.lora_A = nn.Linear(in_features, r, bias=False)
|
307 |
+
self.lora_B = nn.Linear(r, out_features, bias=False)
|
308 |
+
self.scaling = self.lora_alpha / self.r
|
309 |
+
# Freezing the pre-trained weight matrix
|
310 |
+
self.weight.requires_grad = False
|
311 |
+
self.reset_parameters()
|
312 |
+
if fan_in_fan_out:
|
313 |
+
self.weight.data = self.weight.data.T
|
314 |
+
|
315 |
+
def reset_parameters(self):
|
316 |
+
nn.Linear.reset_parameters(self)
|
317 |
+
if hasattr(self, "lora_A"):
|
318 |
+
# initialize A the same way as the default for nn.Linear and B to zero
|
319 |
+
nn.init.kaiming_uniform_(self.lora_A.weight, a=math.sqrt(5))
|
320 |
+
nn.init.zeros_(self.lora_B.weight)
|
321 |
+
|
322 |
+
def train(self, mode: bool = True):
|
323 |
+
nn.Linear.train(self, mode)
|
324 |
+
self.lora_A.train(mode)
|
325 |
+
self.lora_B.train(mode)
|
326 |
+
if not mode and self.merge_weights and not self.merged:
|
327 |
+
# Merge the weights and mark it
|
328 |
+
if self.r > 0:
|
329 |
+
self.weight.data += (
|
330 |
+
transpose(self.lora_B.weight @ self.lora_A.weight, self.fan_in_fan_out) * self.scaling
|
331 |
+
)
|
332 |
+
self.merged = True
|
333 |
+
elif self.merge_weights and self.merged:
|
334 |
+
# Make sure that the weights are not merged
|
335 |
+
if self.r > 0:
|
336 |
+
self.weight.data -= (
|
337 |
+
transpose(self.lora_B.weight @ self.lora_A.weight, self.fan_in_fan_out) * self.scaling
|
338 |
+
)
|
339 |
+
self.merged = False
|
340 |
+
|
341 |
+
def eval(self):
|
342 |
+
nn.Linear.eval(self)
|
343 |
+
self.lora_A.eval()
|
344 |
+
self.lora_B.eval()
|
345 |
+
|
346 |
+
def forward(self, x: torch.Tensor):
|
347 |
+
previous_dtype = self.weight.dtype
|
348 |
+
|
349 |
+
if self.disable_adapters:
|
350 |
+
if self.r > 0 and self.merged:
|
351 |
+
matmul_output = self.lora_B.weight @ self.lora_A.weight
|
352 |
+
self.weight.data -= transpose(matmul_output.to(previous_dtype), self.fan_in_fan_out) * self.scaling
|
353 |
+
self.merged = False
|
354 |
+
|
355 |
+
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
|
356 |
+
elif self.r > 0 and not self.merged:
|
357 |
+
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
|
358 |
+
if self.r > 0:
|
359 |
+
result += self.lora_B(self.lora_A(self.lora_dropout(x.to(self.lora_A.weight.dtype)))) * self.scaling
|
360 |
+
else:
|
361 |
+
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
|
362 |
+
|
363 |
+
if result.dtype != previous_dtype:
|
364 |
+
result = result.to(previous_dtype)
|
365 |
+
|
366 |
+
return result
|
367 |
+
|
368 |
+
class MergedLinear(nn.Linear, LoraLayer):
|
369 |
+
# Lora implemented in a dense layer
|
370 |
+
def __init__(
|
371 |
+
self,
|
372 |
+
in_features: int,
|
373 |
+
out_features: int,
|
374 |
+
r: int = 0,
|
375 |
+
lora_alpha: int = 1,
|
376 |
+
lora_dropout: float = 0.0,
|
377 |
+
enable_lora: List[bool] = [False],
|
378 |
+
fan_in_fan_out: bool = False,
|
379 |
+
merge_weights: bool = True,
|
380 |
+
**kwargs,
|
381 |
+
):
|
382 |
+
nn.Linear.__init__(self, in_features, out_features, **kwargs)
|
383 |
+
LoraLayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, merge_weights=merge_weights)
|
384 |
+
if out_features % len(enable_lora) != 0:
|
385 |
+
raise ValueError("The length of enable_lora must divide out_features")
|
386 |
+
self.enable_lora = enable_lora
|
387 |
+
self.fan_in_fan_out = fan_in_fan_out
|
388 |
+
# Actual trainable parameters
|
389 |
+
if r > 0 and any(enable_lora):
|
390 |
+
self.lora_A = nn.Linear(in_features, r * sum(enable_lora), bias=False)
|
391 |
+
self.lora_B = nn.Conv1d(
|
392 |
+
r * sum(enable_lora),
|
393 |
+
out_features // len(enable_lora) * sum(enable_lora),
|
394 |
+
kernel_size=1,
|
395 |
+
groups=2,
|
396 |
+
bias=False,
|
397 |
+
)
|
398 |
+
self.scaling = self.lora_alpha / self.r
|
399 |
+
# Freezing the pre-trained weight matrix
|
400 |
+
self.weight.requires_grad = False
|
401 |
+
# Compute the indices
|
402 |
+
self.lora_ind = self.weight.new_zeros((out_features,), dtype=torch.bool).view(len(enable_lora), -1)
|
403 |
+
self.lora_ind[enable_lora, :] = True
|
404 |
+
self.lora_ind = self.lora_ind.view(-1)
|
405 |
+
self.reset_parameters()
|
406 |
+
if fan_in_fan_out:
|
407 |
+
self.weight.data = self.weight.data.T
|
408 |
+
|
409 |
+
def reset_parameters(self):
|
410 |
+
nn.Linear.reset_parameters(self)
|
411 |
+
if hasattr(self, "lora_A"):
|
412 |
+
# initialize A the same way as the default for nn.Linear and B to zero
|
413 |
+
nn.init.kaiming_uniform_(self.lora_A.weight, a=math.sqrt(5))
|
414 |
+
nn.init.zeros_(self.lora_B.weight)
|
415 |
+
|
416 |
+
def zero_pad(self, x):
|
417 |
+
result = x.new_zeros((*x.shape[:-1], self.out_features))
|
418 |
+
result = result.view(-1, self.out_features)
|
419 |
+
result[:, self.lora_ind] = x.reshape(-1, self.out_features // len(self.enable_lora) * sum(self.enable_lora))
|
420 |
+
return result.view((*x.shape[:-1], self.out_features))
|
421 |
+
|
422 |
+
def train(self, mode: bool = True):
|
423 |
+
nn.Linear.train(self, mode)
|
424 |
+
self.lora_A.train(mode)
|
425 |
+
self.lora_B.train(mode)
|
426 |
+
if not mode and self.merge_weights and not self.merged:
|
427 |
+
# Merge the weights and mark it
|
428 |
+
if self.r > 0 and any(self.enable_lora):
|
429 |
+
delta_w = (
|
430 |
+
F.conv1d(
|
431 |
+
self.lora_A.weight.data.unsqueeze(0),
|
432 |
+
self.lora_B.weight.data,
|
433 |
+
groups=sum(self.enable_lora),
|
434 |
+
)
|
435 |
+
.squeeze(0)
|
436 |
+
.transpose(-2, -1)
|
437 |
+
)
|
438 |
+
self.weight.data += transpose(self.zero_pad(delta_w * self.scaling), not self.fan_in_fan_out)
|
439 |
+
self.merged = True
|
440 |
+
elif self.merge_weights and self.merged:
|
441 |
+
# Make sure that the weights are not merged
|
442 |
+
if self.r > 0 and any(self.enable_lora):
|
443 |
+
delta_w = (
|
444 |
+
F.conv1d(
|
445 |
+
self.lora_A.weight.data.unsqueeze(0),
|
446 |
+
self.lora_B.weight.data,
|
447 |
+
groups=sum(self.enable_lora),
|
448 |
+
)
|
449 |
+
.squeeze(0)
|
450 |
+
.transpose(-2, -1)
|
451 |
+
)
|
452 |
+
self.weight.data -= transpose(self.zero_pad(delta_w * self.scaling), not self.fan_in_fan_out)
|
453 |
+
self.merged = False
|
454 |
+
|
455 |
+
def eval(self):
|
456 |
+
nn.Linear.eval(self)
|
457 |
+
self.lora_A.eval()
|
458 |
+
self.lora_B.eval()
|
459 |
+
|
460 |
+
def forward(self, x: torch.Tensor):
|
461 |
+
previous_dtype = x.dtype
|
462 |
+
if self.disable_adapters:
|
463 |
+
if self.r > 0 and self.merged and any(self.enable_lora):
|
464 |
+
delta_w = (
|
465 |
+
F.conv1d(
|
466 |
+
self.lora_A.weight.data.unsqueeze(0),
|
467 |
+
self.lora_B.weight.data,
|
468 |
+
groups=sum(self.enable_lora),
|
469 |
+
)
|
470 |
+
.squeeze(0)
|
471 |
+
.transpose(-2, -1)
|
472 |
+
)
|
473 |
+
delta_w = delta_w.to(self.weight.dtype)
|
474 |
+
self.weight.data -= transpose(self.zero_pad(delta_w * self.scaling), not self.fan_in_fan_out)
|
475 |
+
self.merged = False
|
476 |
+
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
|
477 |
+
elif self.merged:
|
478 |
+
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
|
479 |
+
else:
|
480 |
+
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
|
481 |
+
if self.r > 0:
|
482 |
+
after_A = self.lora_A(self.lora_dropout(x.to(self.lora_A.weight.dtype)))
|
483 |
+
after_B = self.lora_B(after_A.transpose(-2, -1)).transpose(-2, -1)
|
484 |
+
result += self.zero_pad(after_B) * self.scaling
|
485 |
+
result = result.to(previous_dtype)
|
486 |
+
|
487 |
+
return result
|
488 |
+
|
489 |
+
|
490 |
+
if is_bnb_available():
|
491 |
+
|
492 |
+
class Linear8bitLt(bnb.nn.Linear8bitLt, LoraLayer):
|
493 |
+
# Lora implemented in a dense layer
|
494 |
+
def __init__(
|
495 |
+
self,
|
496 |
+
in_features,
|
497 |
+
out_features,
|
498 |
+
r: int = 0,
|
499 |
+
lora_alpha: int = 1,
|
500 |
+
lora_dropout: float = 0.0,
|
501 |
+
**kwargs,
|
502 |
+
):
|
503 |
+
bnb.nn.Linear8bitLt.__init__(
|
504 |
+
self,
|
505 |
+
in_features,
|
506 |
+
out_features,
|
507 |
+
bias=kwargs.get("bias", True),
|
508 |
+
has_fp16_weights=kwargs.get("has_fp16_weights", True),
|
509 |
+
memory_efficient_backward=kwargs.get("memory_efficient_backward", False),
|
510 |
+
threshold=kwargs.get("threshold", 0.0),
|
511 |
+
index=kwargs.get("index", None),
|
512 |
+
)
|
513 |
+
LoraLayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, merge_weights=False)
|
514 |
+
# Actual trainable parameters
|
515 |
+
if r > 0:
|
516 |
+
self.lora_A = nn.Linear(in_features, r, bias=False)
|
517 |
+
self.lora_B = nn.Linear(r, out_features, bias=False)
|
518 |
+
self.scaling = self.lora_alpha / self.r
|
519 |
+
# Freezing the pre-trained weight matrix
|
520 |
+
self.weight.requires_grad = False
|
521 |
+
self.reset_parameters()
|
522 |
+
|
523 |
+
def reset_parameters(self):
|
524 |
+
if hasattr(self, "lora_A"):
|
525 |
+
# initialize A the same way as the default for nn.Linear and B to zero
|
526 |
+
nn.init.kaiming_uniform_(self.lora_A.weight, a=math.sqrt(5))
|
527 |
+
nn.init.zeros_(self.lora_B.weight)
|
528 |
+
|
529 |
+
def forward(self, x: torch.Tensor):
|
530 |
+
result = super().forward(x)
|
531 |
+
|
532 |
+
if self.disable_adapters:
|
533 |
+
return result
|
534 |
+
elif self.r > 0:
|
535 |
+
if not torch.is_autocast_enabled():
|
536 |
+
expected_dtype = result.dtype
|
537 |
+
|
538 |
+
if x.dtype != torch.float32:
|
539 |
+
x = x.float()
|
540 |
+
output = self.lora_B(self.lora_A(self.lora_dropout(x))).to(expected_dtype) * self.scaling
|
541 |
+
result += output
|
542 |
+
else:
|
543 |
+
output = self.lora_B(self.lora_A(self.lora_dropout(x))) * self.scaling
|
544 |
+
result += output
|
545 |
+
return result
|
546 |
+
|
547 |
+
class MergedLinear8bitLt(bnb.nn.Linear8bitLt, LoraLayer):
|
548 |
+
# Lora implemented in a dense layer
|
549 |
+
def __init__(
|
550 |
+
self,
|
551 |
+
in_features: int,
|
552 |
+
out_features: int,
|
553 |
+
r: int = 0,
|
554 |
+
lora_alpha: int = 1,
|
555 |
+
lora_dropout: float = 0.0,
|
556 |
+
enable_lora: List[bool] = [False],
|
557 |
+
**kwargs,
|
558 |
+
):
|
559 |
+
bnb.nn.Linear8bitLt.__init__(
|
560 |
+
self,
|
561 |
+
in_features,
|
562 |
+
out_features,
|
563 |
+
bias=kwargs.get("bias", True),
|
564 |
+
has_fp16_weights=kwargs.get("has_fp16_weights", True),
|
565 |
+
memory_efficient_backward=kwargs.get("memory_efficient_backward", False),
|
566 |
+
threshold=kwargs.get("threshold", 0.0),
|
567 |
+
index=kwargs.get("index", None),
|
568 |
+
)
|
569 |
+
LoraLayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, merge_weights=False)
|
570 |
+
if out_features % len(enable_lora) != 0:
|
571 |
+
raise ValueError("The length of enable_lora must divide out_features")
|
572 |
+
self.enable_lora = enable_lora
|
573 |
+
# Actual trainable parameters
|
574 |
+
if r > 0 and any(enable_lora):
|
575 |
+
self.lora_A = nn.Linear(in_features, r * sum(enable_lora), bias=False)
|
576 |
+
self.lora_B = nn.Conv1d(
|
577 |
+
r * sum(enable_lora),
|
578 |
+
out_features // len(enable_lora) * sum(enable_lora),
|
579 |
+
kernel_size=1,
|
580 |
+
groups=2,
|
581 |
+
bias=False,
|
582 |
+
)
|
583 |
+
self.scaling = self.lora_alpha / self.r
|
584 |
+
# Freezing the pre-trained weight matrix
|
585 |
+
self.weight.requires_grad = False
|
586 |
+
# Compute the indices
|
587 |
+
self.lora_ind = self.weight.new_zeros((out_features,), dtype=torch.bool).view(len(enable_lora), -1)
|
588 |
+
self.lora_ind[enable_lora, :] = True
|
589 |
+
self.lora_ind = self.lora_ind.view(-1)
|
590 |
+
self.reset_parameters()
|
591 |
+
|
592 |
+
def reset_parameters(self):
|
593 |
+
if hasattr(self, "lora_A"):
|
594 |
+
# initialize A the same way as the default for nn.Linear and B to zero
|
595 |
+
nn.init.kaiming_uniform_(self.lora_A.weight, a=math.sqrt(5))
|
596 |
+
nn.init.zeros_(self.lora_B.weight)
|
597 |
+
|
598 |
+
def zero_pad(self, x):
|
599 |
+
result = x.new_zeros((*x.shape[:-1], self.out_features))
|
600 |
+
result = result.view(-1, self.out_features)
|
601 |
+
result[:, self.lora_ind] = x.reshape(
|
602 |
+
-1, self.out_features // len(self.enable_lora) * sum(self.enable_lora)
|
603 |
+
)
|
604 |
+
return result.view((*x.shape[:-1], self.out_features))
|
605 |
+
|
606 |
+
def forward(self, x: torch.Tensor):
|
607 |
+
result = super().forward(x)
|
608 |
+
if self.disable_adapters:
|
609 |
+
return result
|
610 |
+
elif self.r > 0:
|
611 |
+
if not torch.is_autocast_enabled():
|
612 |
+
expected_dtype = result.dtype
|
613 |
+
if x.dtype != torch.float32:
|
614 |
+
x = x.float()
|
615 |
+
after_A = self.lora_A(self.lora_dropout(x))
|
616 |
+
after_B = self.lora_B(after_A.transpose(-2, -1)).transpose(-2, -1)
|
617 |
+
output = self.zero_pad(after_B).to(expected_dtype) * self.scaling
|
618 |
+
result += output
|
619 |
+
else:
|
620 |
+
after_A = self.lora_A(self.lora_dropout(x))
|
621 |
+
after_B = self.lora_B(after_A.transpose(-2, -1)).transpose(-2, -1)
|
622 |
+
output = self.zero_pad(after_B) * self.scaling
|
623 |
+
result += output
|
624 |
+
return result
|
SVFT-main/LLM-Adapters/peft/src/peft/tuners/p_tuning.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
import enum
|
17 |
+
import warnings
|
18 |
+
from dataclasses import dataclass, field
|
19 |
+
from typing import Union
|
20 |
+
|
21 |
+
import torch
|
22 |
+
|
23 |
+
from ..utils import PeftType, PromptLearningConfig
|
24 |
+
|
25 |
+
|
26 |
+
class PromptEncoderReparameterizationType(str, enum.Enum):
|
27 |
+
MLP = "MLP"
|
28 |
+
LSTM = "LSTM"
|
29 |
+
|
30 |
+
|
31 |
+
@dataclass
|
32 |
+
class PromptEncoderConfig(PromptLearningConfig):
|
33 |
+
"""
|
34 |
+
This is the configuration class to store the configuration of a [`~peft.PromptEncoder`].
|
35 |
+
|
36 |
+
Args:
|
37 |
+
encoder_reparameterization_type
|
38 |
+
(Union[[`PromptEncoderReparameterizationType`], `str`]): The type of reparameterization to use.
|
39 |
+
encoder_hidden_size (`int`): The hidden size of the prompt encoder.
|
40 |
+
encoder_num_layers (`int`): The number of layers of the prompt encoder.
|
41 |
+
encoder_dropout (`float`): The dropout probability of the prompt encoder.
|
42 |
+
"""
|
43 |
+
|
44 |
+
encoder_reparameterization_type: Union[str, PromptEncoderReparameterizationType] = field(
|
45 |
+
default=PromptEncoderReparameterizationType.MLP,
|
46 |
+
metadata={"help": "How to reparameterize the prompt encoder"},
|
47 |
+
)
|
48 |
+
encoder_hidden_size: int = field(
|
49 |
+
default=None,
|
50 |
+
metadata={"help": "The hidden size of the prompt encoder"},
|
51 |
+
)
|
52 |
+
encoder_num_layers: int = field(
|
53 |
+
default=2,
|
54 |
+
metadata={"help": "The number of layers of the prompt encoder"},
|
55 |
+
)
|
56 |
+
encoder_dropout: float = field(
|
57 |
+
default=0.0,
|
58 |
+
metadata={"help": "The dropout of the prompt encoder"},
|
59 |
+
)
|
60 |
+
|
61 |
+
def __post_init__(self):
|
62 |
+
self.peft_type = PeftType.P_TUNING
|
63 |
+
|
64 |
+
|
65 |
+
# Based on https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/nlp/modules/common/prompt_encoder.py
|
66 |
+
# with some refactor
|
67 |
+
class PromptEncoder(torch.nn.Module):
|
68 |
+
"""
|
69 |
+
The prompt encoder network that is used to generate the virtual token embeddings for p-tuning.
|
70 |
+
|
71 |
+
Args:
|
72 |
+
config ([`PromptEncoderConfig`]): The configuration of the prompt encoder.
|
73 |
+
|
74 |
+
Example::
|
75 |
+
|
76 |
+
>>> from peft import PromptEncoder, PromptEncoderConfig >>> config = PromptEncoderConfig(
|
77 |
+
peft_type="P_TUNING", task_type="SEQ_2_SEQ_LM", num_virtual_tokens=20, token_dim=768,
|
78 |
+
num_transformer_submodules=1, num_attention_heads=12, num_layers=12,
|
79 |
+
encoder_reparameterization_type="MLP", encoder_hidden_size=768
|
80 |
+
)
|
81 |
+
>>> prompt_encoder = PromptEncoder(config)
|
82 |
+
|
83 |
+
**Attributes**:
|
84 |
+
- **embedding** ([`~torch.nn.Embedding`]) -- The embedding layer of the prompt encoder.
|
85 |
+
- **mlp_head** ([`~torch.nn.Sequential`]) -- The MLP head of the prompt encoder if `inference_mode=False`.
|
86 |
+
- **lstm_head** ([`~torch.nn.LSTM`]) -- The LSTM head of the prompt encoder if `inference_mode=False` and
|
87 |
+
`encoder_reparameterization_type="LSTM"`.
|
88 |
+
- **token_dim** (`int`) -- The hidden embedding dimension of the base transformer model.
|
89 |
+
- **input_size** (`int`) -- The input size of the prompt encoder.
|
90 |
+
- **output_size** (`int`) -- The output size of the prompt encoder.
|
91 |
+
- **hidden_size** (`int`) -- The hidden size of the prompt encoder.
|
92 |
+
- **total_virtual_tokens** (`int`): The total number of virtual tokens of the
|
93 |
+
prompt encoder.
|
94 |
+
- **encoder_type** (Union[[`PromptEncoderReparameterizationType`], `str`]):
|
95 |
+
The encoder type of the prompt encoder.
|
96 |
+
|
97 |
+
|
98 |
+
Input shape: (batch_size, total_virtual_tokens)
|
99 |
+
|
100 |
+
Output shape: (batch_size, total_virtual_tokens, token_dim)
|
101 |
+
"""
|
102 |
+
|
103 |
+
def __init__(self, config):
|
104 |
+
super().__init__()
|
105 |
+
self.token_dim = config.token_dim
|
106 |
+
self.input_size = self.token_dim
|
107 |
+
self.output_size = self.token_dim
|
108 |
+
self.hidden_size = config.encoder_hidden_size
|
109 |
+
self.total_virtual_tokens = config.num_virtual_tokens * config.num_transformer_submodules
|
110 |
+
self.encoder_type = config.encoder_reparameterization_type
|
111 |
+
|
112 |
+
# embedding
|
113 |
+
self.embedding = torch.nn.Embedding(self.total_virtual_tokens, self.token_dim)
|
114 |
+
if not config.inference_mode:
|
115 |
+
if self.encoder_type == PromptEncoderReparameterizationType.LSTM:
|
116 |
+
lstm_dropout = config.encoder_dropout
|
117 |
+
num_layers = config.encoder_num_layers
|
118 |
+
# LSTM
|
119 |
+
self.lstm_head = torch.nn.LSTM(
|
120 |
+
input_size=self.input_size,
|
121 |
+
hidden_size=self.hidden_size,
|
122 |
+
num_layers=num_layers,
|
123 |
+
dropout=lstm_dropout,
|
124 |
+
bidirectional=True,
|
125 |
+
batch_first=True,
|
126 |
+
)
|
127 |
+
|
128 |
+
self.mlp_head = torch.nn.Sequential(
|
129 |
+
torch.nn.Linear(self.hidden_size * 2, self.hidden_size * 2),
|
130 |
+
torch.nn.ReLU(),
|
131 |
+
torch.nn.Linear(self.hidden_size * 2, self.output_size),
|
132 |
+
)
|
133 |
+
|
134 |
+
elif self.encoder_type == PromptEncoderReparameterizationType.MLP:
|
135 |
+
warnings.warn(
|
136 |
+
f"for {self.encoder_type}, the `encoder_num_layers` is ignored. Exactly 2 MLP layers are used."
|
137 |
+
)
|
138 |
+
layers = [
|
139 |
+
torch.nn.Linear(self.input_size, self.hidden_size),
|
140 |
+
torch.nn.ReLU(),
|
141 |
+
torch.nn.Linear(self.hidden_size, self.hidden_size),
|
142 |
+
torch.nn.ReLU(),
|
143 |
+
torch.nn.Linear(self.hidden_size, self.output_size),
|
144 |
+
]
|
145 |
+
self.mlp_head = torch.nn.Sequential(*layers)
|
146 |
+
|
147 |
+
else:
|
148 |
+
raise ValueError("Prompt encoder type not recognized. Please use one of MLP (recommended) or LSTM.")
|
149 |
+
|
150 |
+
def forward(self, indices):
|
151 |
+
input_embeds = self.embedding(indices)
|
152 |
+
if self.encoder_type == PromptEncoderReparameterizationType.LSTM:
|
153 |
+
output_embeds = self.mlp_head(self.lstm_head(input_embeds)[0])
|
154 |
+
elif self.encoder_type == PromptEncoderReparameterizationType.MLP:
|
155 |
+
output_embeds = self.mlp_head(input_embeds)
|
156 |
+
else:
|
157 |
+
raise ValueError("Prompt encoder type not recognized. Please use one of MLP (recommended) or LSTM.")
|
158 |
+
|
159 |
+
return output_embeds
|
SVFT-main/LLM-Adapters/peft/src/peft/tuners/prefix_tuning.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
|
17 |
+
from dataclasses import dataclass, field
|
18 |
+
|
19 |
+
import torch
|
20 |
+
|
21 |
+
from ..utils import PeftType, PromptLearningConfig
|
22 |
+
|
23 |
+
|
24 |
+
@dataclass
|
25 |
+
class PrefixTuningConfig(PromptLearningConfig):
|
26 |
+
"""
|
27 |
+
This is the configuration class to store the configuration of a [`~peft.PrefixEncoder`].
|
28 |
+
|
29 |
+
Args:
|
30 |
+
encoder_hidden_size (`int`): The hidden size of the prompt encoder.
|
31 |
+
prefix_projection (`bool`): Whether to project the prefix embeddings.
|
32 |
+
"""
|
33 |
+
|
34 |
+
encoder_hidden_size: int = field(
|
35 |
+
default=None,
|
36 |
+
metadata={"help": "The hidden size of the encoder"},
|
37 |
+
)
|
38 |
+
prefix_projection: bool = field(
|
39 |
+
default=False,
|
40 |
+
metadata={"help": "Whether to project the prefix tokens"},
|
41 |
+
)
|
42 |
+
|
43 |
+
def __post_init__(self):
|
44 |
+
self.peft_type = PeftType.PREFIX_TUNING
|
45 |
+
|
46 |
+
|
47 |
+
# Based on https://github.com/THUDM/P-tuning-v2/blob/main/model/prefix_encoder.py
|
48 |
+
# with some refactor
|
49 |
+
class PrefixEncoder(torch.nn.Module):
|
50 |
+
r"""
|
51 |
+
The torch.nn model to encode the prefix
|
52 |
+
|
53 |
+
Args:
|
54 |
+
config ([`PrefixTuningConfig`]): The configuration of the prefix encoder.
|
55 |
+
|
56 |
+
Example::
|
57 |
+
|
58 |
+
>>> from peft import PrefixEncoder, PrefixTuningConfig >>> config = PrefixTuningConfig(
|
59 |
+
peft_type="PREFIX_TUNING", task_type="SEQ_2_SEQ_LM", num_virtual_tokens=20, token_dim=768,
|
60 |
+
num_transformer_submodules=1, num_attention_heads=12, num_layers=12, encoder_hidden_size=768
|
61 |
+
)
|
62 |
+
>>> prefix_encoder = PrefixEncoder(config)
|
63 |
+
|
64 |
+
|
65 |
+
**Attributes**:
|
66 |
+
- **embedding** (`torch.nn.Embedding`) --
|
67 |
+
The embedding layer of the prefix encoder.
|
68 |
+
- **transform** (`torch.nn.Sequential`) -- The
|
69 |
+
two-layer MLP to transform the prefix embeddings if `prefix_projection` is `True`.
|
70 |
+
- **prefix_projection** (`bool`) -- Whether to project the prefix embeddings.
|
71 |
+
|
72 |
+
Input shape: (batch_size, num_virtual_tokens)
|
73 |
+
|
74 |
+
Output shape: (batch_size, num_virtual_tokens, 2*layers*hidden)
|
75 |
+
"""
|
76 |
+
|
77 |
+
def __init__(self, config):
|
78 |
+
super().__init__()
|
79 |
+
self.prefix_projection = config.prefix_projection
|
80 |
+
token_dim = config.token_dim
|
81 |
+
num_layers = config.num_layers
|
82 |
+
encoder_hidden_size = config.encoder_hidden_size
|
83 |
+
num_virtual_tokens = config.num_virtual_tokens
|
84 |
+
if self.prefix_projection and not config.inference_mode:
|
85 |
+
# Use a two-layer MLP to encode the prefix
|
86 |
+
self.embedding = torch.nn.Embedding(num_virtual_tokens, token_dim)
|
87 |
+
self.transform = torch.nn.Sequential(
|
88 |
+
torch.nn.Linear(token_dim, encoder_hidden_size),
|
89 |
+
torch.nn.Tanh(),
|
90 |
+
torch.nn.Linear(encoder_hidden_size, num_layers * 2 * token_dim),
|
91 |
+
)
|
92 |
+
else:
|
93 |
+
self.embedding = torch.nn.Embedding(num_virtual_tokens, num_layers * 2 * token_dim)
|
94 |
+
|
95 |
+
def forward(self, prefix: torch.Tensor):
|
96 |
+
if self.prefix_projection:
|
97 |
+
prefix_tokens = self.embedding(prefix)
|
98 |
+
past_key_values = self.transform(prefix_tokens)
|
99 |
+
else:
|
100 |
+
past_key_values = self.embedding(prefix)
|
101 |
+
return past_key_values
|
SVFT-main/LLM-Adapters/peft/src/peft/tuners/prompt_tuning.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
import enum
|
17 |
+
import math
|
18 |
+
from dataclasses import dataclass, field
|
19 |
+
from typing import Optional, Union
|
20 |
+
|
21 |
+
import torch
|
22 |
+
|
23 |
+
from ..utils import PeftType, PromptLearningConfig
|
24 |
+
|
25 |
+
|
26 |
+
class PromptTuningInit(str, enum.Enum):
|
27 |
+
TEXT = "TEXT"
|
28 |
+
RANDOM = "RANDOM"
|
29 |
+
|
30 |
+
|
31 |
+
@dataclass
|
32 |
+
class PromptTuningConfig(PromptLearningConfig):
|
33 |
+
"""
|
34 |
+
This is the configuration class to store the configuration of a [`~peft.PromptEmbedding`].
|
35 |
+
|
36 |
+
Args:
|
37 |
+
prompt_tuning_init (Union[[`PromptTuningInit`], `str`]): The initialization of the prompt embedding.
|
38 |
+
prompt_tuning_init_text ( Optional[`str`]): The text to initialize the prompt embedding.
|
39 |
+
Only used if `prompt_tuning_init` is `TEXT`
|
40 |
+
tokenizer_name_or_path ( Optional[`str`]): The name or path of the tokenizer.
|
41 |
+
Only used if `prompt_tuning_init` is `TEXT`
|
42 |
+
"""
|
43 |
+
|
44 |
+
prompt_tuning_init: Union[PromptTuningInit, str] = field(
|
45 |
+
default=PromptTuningInit.RANDOM,
|
46 |
+
metadata={"help": "How to initialize the prompt tuning parameters"},
|
47 |
+
)
|
48 |
+
prompt_tuning_init_text: Optional[str] = field(
|
49 |
+
default=None,
|
50 |
+
metadata={
|
51 |
+
"help": "The text to use for prompt tuning initialization. Only used if prompt_tuning_init is `TEXT`"
|
52 |
+
},
|
53 |
+
)
|
54 |
+
tokenizer_name_or_path: Optional[str] = field(
|
55 |
+
default=None,
|
56 |
+
metadata={
|
57 |
+
"help": "The tokenizer to use for prompt tuning initialization. Only used if prompt_tuning_init is `TEXT`"
|
58 |
+
},
|
59 |
+
)
|
60 |
+
|
61 |
+
def __post_init__(self):
|
62 |
+
self.peft_type = PeftType.PROMPT_TUNING
|
63 |
+
|
64 |
+
|
65 |
+
class PromptEmbedding(torch.nn.Module):
|
66 |
+
"""
|
67 |
+
The model to encode virtual tokens into prompt embeddings.
|
68 |
+
|
69 |
+
Args:
|
70 |
+
config ([`PromptTuningConfig`]): The configuration of the prompt embedding.
|
71 |
+
word_embeddings (`torch.nn.Module`): The word embeddings of the base transformer model.
|
72 |
+
|
73 |
+
**Attributes**:
|
74 |
+
**embedding** (`torch.nn.Embedding`) -- The embedding layer of the prompt embedding.
|
75 |
+
|
76 |
+
Example::
|
77 |
+
|
78 |
+
>>> from peft import PromptEmbedding, PromptTuningConfig >>> config = PromptTuningConfig(
|
79 |
+
peft_type="PROMPT_TUNING", task_type="SEQ_2_SEQ_LM", num_virtual_tokens=20, token_dim=768,
|
80 |
+
num_transformer_submodules=1, num_attention_heads=12, num_layers=12, prompt_tuning_init="TEXT",
|
81 |
+
prompt_tuning_init_text="Predict if sentiment of this review is positive, negative or neutral",
|
82 |
+
tokenizer_name_or_path="t5-base",
|
83 |
+
)
|
84 |
+
>>> # t5_model.shared is the word embeddings of the base model >>> prompt_embedding = PromptEmbedding(config,
|
85 |
+
t5_model.shared)
|
86 |
+
|
87 |
+
|
88 |
+
Input Shape: (batch_size, total_virtual_tokens)
|
89 |
+
|
90 |
+
Output Shape: (batch_size, total_virtual_tokens, token_dim)
|
91 |
+
"""
|
92 |
+
|
93 |
+
def __init__(self, config, word_embeddings):
|
94 |
+
super().__init__()
|
95 |
+
|
96 |
+
total_virtual_tokens = config.num_virtual_tokens * config.num_transformer_submodules
|
97 |
+
self.embedding = torch.nn.Embedding(total_virtual_tokens, config.token_dim)
|
98 |
+
if config.prompt_tuning_init == PromptTuningInit.TEXT:
|
99 |
+
from transformers import AutoTokenizer
|
100 |
+
|
101 |
+
tokenizer = AutoTokenizer.from_pretrained(config.tokenizer_name_or_path)
|
102 |
+
init_text = config.prompt_tuning_init_text
|
103 |
+
init_token_ids = tokenizer(init_text)["input_ids"]
|
104 |
+
# Trim or iterate until num_text_tokens matches total_virtual_tokens
|
105 |
+
num_text_tokens = len(init_token_ids)
|
106 |
+
if num_text_tokens > total_virtual_tokens:
|
107 |
+
init_token_ids = init_token_ids[:total_virtual_tokens]
|
108 |
+
elif num_text_tokens < total_virtual_tokens:
|
109 |
+
num_reps = math.ceil(total_virtual_tokens / num_text_tokens)
|
110 |
+
init_token_ids = init_token_ids * num_reps
|
111 |
+
init_token_ids = init_token_ids[:total_virtual_tokens]
|
112 |
+
|
113 |
+
word_embedding_weights = word_embeddings(torch.LongTensor(init_token_ids)).detach().clone()
|
114 |
+
word_embedding_weights = word_embedding_weights.to(torch.float32)
|
115 |
+
self.embedding.weight = torch.nn.Parameter(word_embedding_weights)
|
116 |
+
|
117 |
+
def forward(self, indices):
|
118 |
+
# Just get embeddings
|
119 |
+
prompt_embeddings = self.embedding(indices)
|
120 |
+
return prompt_embeddings
|
SVFT-main/LLM-Adapters/peft/src/peft/utils/__init__.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# flake8: noqa
|
2 |
+
# There's no way to ignore "F401 '...' imported but unused" warnings in this
|
3 |
+
# module, but to preserve other warnings. So, don't check this module at all
|
4 |
+
|
5 |
+
# coding=utf-8
|
6 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
7 |
+
#
|
8 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
9 |
+
# you may not use this file except in compliance with the License.
|
10 |
+
# You may obtain a copy of the License at
|
11 |
+
#
|
12 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13 |
+
#
|
14 |
+
# Unless required by applicable law or agreed to in writing, software
|
15 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
17 |
+
# See the License for the specific language governing permissions and
|
18 |
+
# limitations under the License.
|
19 |
+
|
20 |
+
from .adapters_utils import CONFIG_NAME, WEIGHTS_NAME
|
21 |
+
from .config import PeftConfig, PeftType, PromptLearningConfig, TaskType
|
22 |
+
from .other import (
|
23 |
+
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
|
24 |
+
_set_trainable,
|
25 |
+
bloom_model_postprocess_past_key_value,
|
26 |
+
prepare_model_for_int8_training,
|
27 |
+
shift_tokens_right,
|
28 |
+
transpose,
|
29 |
+
)
|
30 |
+
from .save_and_load import get_peft_model_state_dict, set_peft_model_state_dict
|
SVFT-main/LLM-Adapters/peft/src/peft/utils/adapters_utils.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
WEIGHTS_NAME = "adapter_model.bin"
|
16 |
+
CONFIG_NAME = "adapter_config.json"
|
17 |
+
|
18 |
+
# TODO: add automapping and superclass here?
|
SVFT-main/LLM-Adapters/peft/src/peft/utils/config.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
import enum
|
16 |
+
import json
|
17 |
+
import os
|
18 |
+
from dataclasses import asdict, dataclass, field
|
19 |
+
from typing import Optional, Union
|
20 |
+
|
21 |
+
from huggingface_hub import hf_hub_download
|
22 |
+
from transformers.utils import PushToHubMixin
|
23 |
+
|
24 |
+
from .adapters_utils import CONFIG_NAME
|
25 |
+
|
26 |
+
|
27 |
+
class PeftType(str, enum.Enum):
|
28 |
+
PROMPT_TUNING = "PROMPT_TUNING"
|
29 |
+
P_TUNING = "P_TUNING"
|
30 |
+
PREFIX_TUNING = "PREFIX_TUNING"
|
31 |
+
LORA = "LORA"
|
32 |
+
BOTTLENECK = "BOTTLENECK"
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
class TaskType(str, enum.Enum):
|
37 |
+
SEQ_CLS = "SEQ_CLS"
|
38 |
+
SEQ_2_SEQ_LM = "SEQ_2_SEQ_LM"
|
39 |
+
CAUSAL_LM = "CAUSAL_LM"
|
40 |
+
TOKEN_CLS = "TOKEN_CLS"
|
41 |
+
|
42 |
+
|
43 |
+
@dataclass
|
44 |
+
class PeftConfigMixin(PushToHubMixin):
|
45 |
+
r"""
|
46 |
+
This is the base configuration class for PEFT adapter models. It contains all the methods that are common to all
|
47 |
+
PEFT adapter models. This class inherits from `transformers.utils.PushToHubMixin` which contains the methods to
|
48 |
+
push your model to the Hub. The method `save_pretrained` will save the configuration of your adapter model in a
|
49 |
+
directory. The method `from_pretrained` will load the configuration of your adapter model from a directory.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
peft_type (Union[[`~peft.utils.config.PeftType`], `str`]): The type of Peft method to use.
|
53 |
+
"""
|
54 |
+
peft_type: Optional[PeftType] = field(default=None, metadata={"help": "The type of PEFT model."})
|
55 |
+
|
56 |
+
@property
|
57 |
+
def __dict__(self):
|
58 |
+
return asdict(self)
|
59 |
+
|
60 |
+
def to_dict(self):
|
61 |
+
return self.__dict__
|
62 |
+
|
63 |
+
def save_pretrained(self, save_directory, **kwargs):
|
64 |
+
r"""
|
65 |
+
This method saves the configuration of your adapter model in a directory.
|
66 |
+
|
67 |
+
Args:
|
68 |
+
save_directory (`str`):
|
69 |
+
The directory where the configuration will be saved.
|
70 |
+
**kwargs:
|
71 |
+
Additional keyword arguments passed along to the `transformers.utils.PushToHubMixin.push_to_hub`
|
72 |
+
method.
|
73 |
+
"""
|
74 |
+
if os.path.isfile(save_directory):
|
75 |
+
raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
|
76 |
+
|
77 |
+
os.makedirs(save_directory, exist_ok=True)
|
78 |
+
|
79 |
+
output_dict = self.__dict__
|
80 |
+
output_path = os.path.join(save_directory, CONFIG_NAME)
|
81 |
+
|
82 |
+
# save it
|
83 |
+
with open(output_path, "w") as writer:
|
84 |
+
writer.write(json.dumps(output_dict, indent=2, sort_keys=True))
|
85 |
+
|
86 |
+
@classmethod
|
87 |
+
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
|
88 |
+
r"""
|
89 |
+
This method loads the configuration of your adapter model from a directory.
|
90 |
+
|
91 |
+
Args:
|
92 |
+
pretrained_model_name_or_path (`str`):
|
93 |
+
The directory or the hub-id where the configuration is saved.
|
94 |
+
**kwargs:
|
95 |
+
Additional keyword arguments passed along to the child class initialization.
|
96 |
+
"""
|
97 |
+
if os.path.isfile(os.path.join(pretrained_model_name_or_path, CONFIG_NAME)):
|
98 |
+
config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME)
|
99 |
+
else:
|
100 |
+
try:
|
101 |
+
config_file = hf_hub_download(pretrained_model_name_or_path, CONFIG_NAME)
|
102 |
+
except Exception:
|
103 |
+
raise ValueError(f"Can't find config.json at '{pretrained_model_name_or_path}'")
|
104 |
+
|
105 |
+
loaded_attributes = cls.from_json_file(config_file)
|
106 |
+
|
107 |
+
config = cls(**kwargs)
|
108 |
+
|
109 |
+
for key, value in loaded_attributes.items():
|
110 |
+
if hasattr(config, key):
|
111 |
+
setattr(config, key, value)
|
112 |
+
|
113 |
+
return config
|
114 |
+
|
115 |
+
@classmethod
|
116 |
+
def from_json_file(cls, path_json_file, **kwargs):
|
117 |
+
r"""
|
118 |
+
Loads a configuration file from a json file.
|
119 |
+
|
120 |
+
Args:
|
121 |
+
path_json_file (`str`):
|
122 |
+
The path to the json file.
|
123 |
+
"""
|
124 |
+
with open(path_json_file, "r") as file:
|
125 |
+
json_object = json.load(file)
|
126 |
+
|
127 |
+
return json_object
|
128 |
+
|
129 |
+
|
130 |
+
@dataclass
|
131 |
+
class PeftConfig(PeftConfigMixin):
|
132 |
+
"""
|
133 |
+
This is the base configuration class to store the configuration of a :class:`~peft.PeftModel`.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
peft_type (Union[[`~peft.utils.config.PeftType`], `str`]): The type of Peft method to use.
|
137 |
+
task_type (Union[[`~peft.utils.config.TaskType`], `str`]): The type of task to perform.
|
138 |
+
inference_mode (`bool`, defaults to `False`): Whether to use the Peft model in inference mode.
|
139 |
+
"""
|
140 |
+
|
141 |
+
base_model_name_or_path: str = field(default=None, metadata={"help": "The name of the base model to use."})
|
142 |
+
peft_type: Union[str, PeftType] = field(default=None, metadata={"help": "Peft type"})
|
143 |
+
task_type: Union[str, TaskType] = field(default=None, metadata={"help": "Task type"})
|
144 |
+
inference_mode: bool = field(default=False, metadata={"help": "Whether to use inference mode"})
|
145 |
+
|
146 |
+
|
147 |
+
@dataclass
|
148 |
+
class PromptLearningConfig(PeftConfig):
|
149 |
+
"""
|
150 |
+
This is the base configuration class to store the configuration of a Union[[`~peft.PrefixTuning`],
|
151 |
+
[`~peft.PromptEncoder`], [`~peft.PromptTuning`]].
|
152 |
+
|
153 |
+
Args:
|
154 |
+
num_virtual_tokens (`int`): The number of virtual tokens to use.
|
155 |
+
token_dim (`int`): The hidden embedding dimension of the base transformer model.
|
156 |
+
num_transformer_submodules (`int`): The number of transformer submodules in the base transformer model.
|
157 |
+
num_attention_heads (`int`): The number of attention heads in the base transformer model.
|
158 |
+
num_layers (`int`): The number of layers in the base transformer model.
|
159 |
+
"""
|
160 |
+
|
161 |
+
num_virtual_tokens: int = field(default=None, metadata={"help": "Number of virtual tokens"})
|
162 |
+
token_dim: int = field(
|
163 |
+
default=None, metadata={"help": "The hidden embedding dimension of the base transformer model"}
|
164 |
+
)
|
165 |
+
num_transformer_submodules: Optional[int] = field(
|
166 |
+
default=None, metadata={"help": "Number of transformer submodules"}
|
167 |
+
)
|
168 |
+
num_attention_heads: Optional[int] = field(default=None, metadata={"help": "Number of attention heads"})
|
169 |
+
num_layers: Optional[int] = field(default=None, metadata={"help": "Number of transformer layers"})
|
SVFT-main/LLM-Adapters/peft/src/peft/utils/other.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
import torch
|
17 |
+
|
18 |
+
|
19 |
+
# needed for prefix-tuning of bloom model
|
20 |
+
def bloom_model_postprocess_past_key_value(past_key_values):
|
21 |
+
past_key_values = torch.cat(past_key_values)
|
22 |
+
total_layers, batch_size, num_attention_heads, num_virtual_tokens, head_dim = past_key_values.shape
|
23 |
+
keys = past_key_values[: total_layers // 2]
|
24 |
+
keys = keys.transpose(2, 3).reshape(
|
25 |
+
total_layers // 2, batch_size * num_attention_heads, head_dim, num_virtual_tokens
|
26 |
+
)
|
27 |
+
values = past_key_values[total_layers // 2 :]
|
28 |
+
values = values.reshape(total_layers // 2, batch_size * num_attention_heads, num_virtual_tokens, head_dim)
|
29 |
+
|
30 |
+
return tuple(zip(keys, values))
|
31 |
+
|
32 |
+
|
33 |
+
def prepare_model_for_int8_training(
|
34 |
+
model, output_embedding_layer_name="lm_head", use_gradient_checkpointing=True, layer_norm_names=["layer_norm"]
|
35 |
+
):
|
36 |
+
r"""
|
37 |
+
This method wrapps the entire protocol for preparing a model before running a training. This includes:
|
38 |
+
1- Cast the layernorm in fp32 2- making output embedding layer require grads 3- Add the upcasting of the lm
|
39 |
+
head to fp32
|
40 |
+
|
41 |
+
Args:
|
42 |
+
model, (`transformers.PreTrainedModel`):
|
43 |
+
The loaded model from `transformers`
|
44 |
+
"""
|
45 |
+
loaded_in_8bit = getattr(model, "is_loaded_in_8bit", False)
|
46 |
+
|
47 |
+
for name, param in model.named_parameters():
|
48 |
+
# freeze base model's layers
|
49 |
+
param.requires_grad = False
|
50 |
+
|
51 |
+
if loaded_in_8bit:
|
52 |
+
# cast layer norm in fp32 for stability for 8bit models
|
53 |
+
if param.ndim == 1 and any(layer_norm_name in name for layer_norm_name in layer_norm_names):
|
54 |
+
param.data = param.data.to(torch.float32)
|
55 |
+
|
56 |
+
if loaded_in_8bit and use_gradient_checkpointing:
|
57 |
+
# For backward compatibility
|
58 |
+
if hasattr(model, "enable_input_require_grads"):
|
59 |
+
model.enable_input_require_grads()
|
60 |
+
else:
|
61 |
+
|
62 |
+
def make_inputs_require_grad(module, input, output):
|
63 |
+
output.requires_grad_(True)
|
64 |
+
|
65 |
+
model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
|
66 |
+
|
67 |
+
# enable gradient checkpointing for memory efficiency
|
68 |
+
model.gradient_checkpointing_enable()
|
69 |
+
|
70 |
+
if hasattr(model, output_embedding_layer_name):
|
71 |
+
output_embedding_layer = getattr(model, output_embedding_layer_name)
|
72 |
+
input_dtype = output_embedding_layer.weight.dtype
|
73 |
+
|
74 |
+
class CastOutputToFloat(torch.nn.Sequential):
|
75 |
+
r"""
|
76 |
+
Manually cast to the expected dtype of the lm_head as sometimes there is a final layer norm that is casted
|
77 |
+
in fp32
|
78 |
+
|
79 |
+
"""
|
80 |
+
|
81 |
+
def forward(self, x):
|
82 |
+
return super().forward(x.to(input_dtype)).to(torch.float32)
|
83 |
+
|
84 |
+
setattr(model, output_embedding_layer_name, CastOutputToFloat(output_embedding_layer))
|
85 |
+
|
86 |
+
return model
|
87 |
+
|
88 |
+
|
89 |
+
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING = {
|
90 |
+
"bloom": bloom_model_postprocess_past_key_value,
|
91 |
+
}
|
92 |
+
|
93 |
+
|
94 |
+
# copied from transformers.models.bart.modeling_bart
|
95 |
+
def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int):
|
96 |
+
"""
|
97 |
+
Shift input ids one token to the right.
|
98 |
+
|
99 |
+
Args:
|
100 |
+
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): input ids
|
101 |
+
pad_token_id (`int`): The id of the `padding` token.
|
102 |
+
decoder_start_token_id (`int`): The id of the `start` token.
|
103 |
+
"""
|
104 |
+
shifted_input_ids = input_ids.new_zeros(input_ids.shape)
|
105 |
+
shifted_input_ids[:, 1:] = input_ids[:, :-1].clone()
|
106 |
+
shifted_input_ids[:, 0] = decoder_start_token_id
|
107 |
+
|
108 |
+
if pad_token_id is None:
|
109 |
+
raise ValueError("self.model.config.pad_token_id has to be defined.")
|
110 |
+
# replace possible -100 values in labels by `pad_token_id`
|
111 |
+
shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id)
|
112 |
+
|
113 |
+
return shifted_input_ids
|
114 |
+
|
115 |
+
|
116 |
+
def _set_trainable(model):
|
117 |
+
if model.modules_to_save is not None:
|
118 |
+
for name, param in model.named_parameters():
|
119 |
+
if any(module_name in name for module_name in model.modules_to_save):
|
120 |
+
param.requires_grad = True
|
121 |
+
|
122 |
+
|
123 |
+
def fsdp_auto_wrap_policy(model):
|
124 |
+
import functools
|
125 |
+
import os
|
126 |
+
|
127 |
+
from accelerate import FullyShardedDataParallelPlugin
|
128 |
+
from torch.distributed.fsdp.wrap import _or_policy, lambda_auto_wrap_policy, transformer_auto_wrap_policy
|
129 |
+
|
130 |
+
from ..tuners import PrefixEncoder, PromptEmbedding, PromptEncoder
|
131 |
+
|
132 |
+
def lambda_policy_fn(module):
|
133 |
+
if (
|
134 |
+
len(list(module.named_children())) == 0
|
135 |
+
and getattr(module, "weight", None) is not None
|
136 |
+
and module.weight.requires_grad
|
137 |
+
):
|
138 |
+
return True
|
139 |
+
return False
|
140 |
+
|
141 |
+
lambda_policy = functools.partial(lambda_auto_wrap_policy, lambda_fn=lambda_policy_fn)
|
142 |
+
transformer_wrap_policy = functools.partial(
|
143 |
+
transformer_auto_wrap_policy,
|
144 |
+
transformer_layer_cls=(
|
145 |
+
PrefixEncoder,
|
146 |
+
PromptEncoder,
|
147 |
+
PromptEmbedding,
|
148 |
+
FullyShardedDataParallelPlugin.get_module_class_from_name(
|
149 |
+
model, os.environ.get("FSDP_TRANSFORMER_CLS_TO_WRAP", "")
|
150 |
+
),
|
151 |
+
),
|
152 |
+
)
|
153 |
+
|
154 |
+
auto_wrap_policy = functools.partial(_or_policy, policies=[lambda_policy, transformer_wrap_policy])
|
155 |
+
return auto_wrap_policy
|
156 |
+
|
157 |
+
|
158 |
+
def transpose(weight, fan_in_fan_out):
|
159 |
+
return weight.T if fan_in_fan_out else weight
|
SVFT-main/LLM-Adapters/peft/src/peft/utils/save_and_load.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
from .config import PeftType
|
17 |
+
|
18 |
+
|
19 |
+
def get_peft_model_state_dict(model, state_dict=None):
|
20 |
+
"""
|
21 |
+
Get the state dict of the Peft model.
|
22 |
+
|
23 |
+
Args:
|
24 |
+
model ([`PeftModel`]): The Peft model. When using torch.nn.DistributedDataParallel, DeepSpeed or FSDP,
|
25 |
+
the model should be the underlying model/unwrapped model (i.e. model.module).
|
26 |
+
state_dict (`dict`, *optional*, defaults to `None`):
|
27 |
+
The state dict of the model. If not provided, the state dict of the model
|
28 |
+
will be used.
|
29 |
+
"""
|
30 |
+
if state_dict is None:
|
31 |
+
state_dict = model.state_dict()
|
32 |
+
if model.peft_config.peft_type == PeftType.LORA:
|
33 |
+
# to_return = lora_state_dict(model, bias=model.peft_config.bias)
|
34 |
+
# adapted from `https://github.com/microsoft/LoRA/blob/main/loralib/utils.py`
|
35 |
+
# to directly with the state dict which is necessary when using DeepSpeed or FSDP
|
36 |
+
bias = model.peft_config.bias
|
37 |
+
if bias == "none":
|
38 |
+
to_return = {k: state_dict[k] for k in state_dict if "lora_" in k}
|
39 |
+
elif bias == "all":
|
40 |
+
to_return = {k: state_dict[k] for k in state_dict if "lora_" in k or "bias" in k}
|
41 |
+
elif bias == "lora_only":
|
42 |
+
to_return = {}
|
43 |
+
for k in state_dict:
|
44 |
+
if "lora_" in k:
|
45 |
+
to_return[k] = state_dict[k]
|
46 |
+
bias_name = k.split("lora_")[0] + "bias"
|
47 |
+
if bias_name in state_dict:
|
48 |
+
to_return[bias_name] = state_dict[bias_name]
|
49 |
+
else:
|
50 |
+
raise NotImplementedError
|
51 |
+
elif model.peft_config.peft_type == PeftType.BOTTLENECK:
|
52 |
+
# return the state dict of the model with Bottleneck adapters
|
53 |
+
bias = model.peft_config.bias
|
54 |
+
if bias == "none":
|
55 |
+
to_return = {k: state_dict[k] for k in state_dict if "adapter_" in k}
|
56 |
+
elif bias == "all":
|
57 |
+
to_return = {k: state_dict[k] for k in state_dict if "adapter_" in k or "bias" in k}
|
58 |
+
elif bias == "adapter_only":
|
59 |
+
to_return = {}
|
60 |
+
for k in state_dict:
|
61 |
+
if "adapter_" in k:
|
62 |
+
to_return[k] = state_dict[k]
|
63 |
+
bias_name = k.split("adapter_")[0] + "bias"
|
64 |
+
if bias_name in state_dict:
|
65 |
+
to_return[bias_name] = state_dict[bias_name]
|
66 |
+
else:
|
67 |
+
raise NotImplementedError
|
68 |
+
else:
|
69 |
+
to_return = {}
|
70 |
+
if model.peft_config.inference_mode:
|
71 |
+
prompt_embeddings = model.prompt_encoder.embedding.weight
|
72 |
+
else:
|
73 |
+
prompt_embeddings = model.get_prompt_embedding_to_save()
|
74 |
+
to_return["prompt_embeddings"] = prompt_embeddings
|
75 |
+
if model.modules_to_save is not None:
|
76 |
+
for key, value in state_dict.items():
|
77 |
+
if any(module_name in key for module_name in model.modules_to_save):
|
78 |
+
to_return[key] = value
|
79 |
+
return to_return
|
80 |
+
|
81 |
+
|
82 |
+
def set_peft_model_state_dict(model, peft_model_state_dict):
|
83 |
+
"""
|
84 |
+
Set the state dict of the Peft model.
|
85 |
+
|
86 |
+
Args:
|
87 |
+
model ([`PeftModel`]): The Peft model.
|
88 |
+
peft_model_state_dict (`dict`): The state dict of the Peft model.
|
89 |
+
"""
|
90 |
+
|
91 |
+
model.load_state_dict(peft_model_state_dict, strict=False)
|
92 |
+
if model.peft_config.peft_type != PeftType.LORA and model.peft_config.peft_type != PeftType.BOTTLENECK:
|
93 |
+
model.prompt_encoder.embedding.load_state_dict(
|
94 |
+
{"weight": peft_model_state_dict["prompt_embeddings"]}, strict=True
|
95 |
+
)
|
96 |
+
return model
|
SVFT-main/LLM-Adapters/peft/tests/__init__.py
ADDED
File without changes
|
SVFT-main/LLM-Adapters/peft/tests/test_config.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
import os
|
16 |
+
import tempfile
|
17 |
+
import unittest
|
18 |
+
|
19 |
+
from peft import LoraConfig, PrefixTuningConfig, PromptEncoderConfig, PromptTuningConfig
|
20 |
+
|
21 |
+
|
22 |
+
class PeftConfigTestMixin:
|
23 |
+
all_config_classes = (
|
24 |
+
LoraConfig,
|
25 |
+
PromptEncoderConfig,
|
26 |
+
PrefixTuningConfig,
|
27 |
+
PromptTuningConfig,
|
28 |
+
)
|
29 |
+
|
30 |
+
|
31 |
+
class PeftConfigTester(unittest.TestCase, PeftConfigTestMixin):
|
32 |
+
def test_methods(self):
|
33 |
+
r"""
|
34 |
+
Test if all configs have the expected methods. Here we test
|
35 |
+
- to_dict
|
36 |
+
- save_pretrained
|
37 |
+
- from_pretrained
|
38 |
+
- from_json_file
|
39 |
+
"""
|
40 |
+
# test if all configs have the expected methods
|
41 |
+
for config_class in self.all_config_classes:
|
42 |
+
config = config_class()
|
43 |
+
self.assertTrue(hasattr(config, "to_dict"))
|
44 |
+
self.assertTrue(hasattr(config, "save_pretrained"))
|
45 |
+
self.assertTrue(hasattr(config, "from_pretrained"))
|
46 |
+
self.assertTrue(hasattr(config, "from_json_file"))
|
47 |
+
|
48 |
+
def test_task_type(self):
|
49 |
+
for config_class in self.all_config_classes:
|
50 |
+
# assert this will not fail
|
51 |
+
_ = config_class(task_type="test")
|
52 |
+
|
53 |
+
def test_save_pretrained(self):
|
54 |
+
r"""
|
55 |
+
Test if the config is correctly saved and loaded using
|
56 |
+
- save_pretrained
|
57 |
+
"""
|
58 |
+
for config_class in self.all_config_classes:
|
59 |
+
config = config_class()
|
60 |
+
with tempfile.TemporaryDirectory() as tmp_dirname:
|
61 |
+
config.save_pretrained(tmp_dirname)
|
62 |
+
|
63 |
+
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
64 |
+
self.assertEqual(config.to_dict(), config_from_pretrained.to_dict())
|
65 |
+
|
66 |
+
def test_from_json_file(self):
|
67 |
+
for config_class in self.all_config_classes:
|
68 |
+
config = config_class()
|
69 |
+
with tempfile.TemporaryDirectory() as tmp_dirname:
|
70 |
+
config.save_pretrained(tmp_dirname)
|
71 |
+
|
72 |
+
config_from_json = config_class.from_json_file(os.path.join(tmp_dirname, "adapter_config.json"))
|
73 |
+
self.assertEqual(config.to_dict(), config_from_json)
|
74 |
+
|
75 |
+
def test_to_dict(self):
|
76 |
+
r"""
|
77 |
+
Test if the config can be correctly converted to a dict using:
|
78 |
+
- to_dict
|
79 |
+
- __dict__
|
80 |
+
"""
|
81 |
+
for config_class in self.all_config_classes:
|
82 |
+
config = config_class()
|
83 |
+
self.assertEqual(config.to_dict(), config.__dict__)
|
84 |
+
self.assertTrue(isinstance(config.to_dict(), dict))
|
85 |
+
|
86 |
+
def test_set_attributes(self):
|
87 |
+
# manually set attributes and check if they are correctly written
|
88 |
+
for config_class in self.all_config_classes:
|
89 |
+
config = config_class(peft_type="test")
|
90 |
+
|
91 |
+
# save pretrained
|
92 |
+
with tempfile.TemporaryDirectory() as tmp_dirname:
|
93 |
+
config.save_pretrained(tmp_dirname)
|
94 |
+
|
95 |
+
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
96 |
+
self.assertEqual(config.to_dict(), config_from_pretrained.to_dict())
|
SVFT-main/LLM-Adapters/peft/tests/test_peft_model.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
import os
|
16 |
+
import tempfile
|
17 |
+
import unittest
|
18 |
+
|
19 |
+
import torch
|
20 |
+
from parameterized import parameterized
|
21 |
+
from transformers import AutoModelForCausalLM
|
22 |
+
|
23 |
+
from peft import (
|
24 |
+
PeftModel,
|
25 |
+
get_peft_model,
|
26 |
+
get_peft_model_state_dict,
|
27 |
+
prepare_model_for_int8_training,
|
28 |
+
)
|
29 |
+
|
30 |
+
from .testing_common import PeftTestConfigManager
|
31 |
+
|
32 |
+
|
33 |
+
# This has to be in the order: model_id, lora_kwargs, prefix_tuning_kwargs, prompt_encoder_kwargs, prompt_tuning_kwargs
|
34 |
+
PEFT_MODELS_TO_TEST = [
|
35 |
+
("hf-internal-testing/tiny-random-OPTForCausalLM", {"target_modules": ["q_proj", "v_proj"]}, {}, {}, {}),
|
36 |
+
]
|
37 |
+
|
38 |
+
|
39 |
+
class PeftTestMixin:
|
40 |
+
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
41 |
+
|
42 |
+
|
43 |
+
class PeftModelTester(unittest.TestCase, PeftTestMixin):
|
44 |
+
r"""
|
45 |
+
Test if the PeftModel behaves as expected. This includes:
|
46 |
+
- test if the model has the expected methods
|
47 |
+
|
48 |
+
We use parametrized.expand for debugging purposes to test each model individually.
|
49 |
+
"""
|
50 |
+
|
51 |
+
@parameterized.expand(PeftTestConfigManager.get_grid_parameters(PEFT_MODELS_TO_TEST))
|
52 |
+
def test_attributes_parametrized(self, test_name, model_id, config_cls, config_kwargs):
|
53 |
+
self._test_model_attr(model_id, config_cls, config_kwargs)
|
54 |
+
|
55 |
+
def _test_model_attr(self, model_id, config_cls, config_kwargs):
|
56 |
+
model = AutoModelForCausalLM.from_pretrained(model_id)
|
57 |
+
config = config_cls(
|
58 |
+
base_model_name_or_path=model_id,
|
59 |
+
**config_kwargs,
|
60 |
+
)
|
61 |
+
model = get_peft_model(model, config)
|
62 |
+
|
63 |
+
self.assertTrue(hasattr(model, "save_pretrained"))
|
64 |
+
self.assertTrue(hasattr(model, "from_pretrained"))
|
65 |
+
self.assertTrue(hasattr(model, "push_to_hub"))
|
66 |
+
|
67 |
+
def _test_prepare_for_training(self, model_id, config_cls, config_kwargs):
|
68 |
+
model = AutoModelForCausalLM.from_pretrained(model_id).to(self.torch_device)
|
69 |
+
config = config_cls(
|
70 |
+
base_model_name_or_path=model_id,
|
71 |
+
**config_kwargs,
|
72 |
+
)
|
73 |
+
model = get_peft_model(model, config)
|
74 |
+
|
75 |
+
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
76 |
+
dummy_output = model.get_input_embeddings()(dummy_input)
|
77 |
+
|
78 |
+
self.assertTrue(not dummy_output.requires_grad)
|
79 |
+
|
80 |
+
# load with `prepare_model_for_int8_training`
|
81 |
+
model = AutoModelForCausalLM.from_pretrained(model_id).to(self.torch_device)
|
82 |
+
model = prepare_model_for_int8_training(model)
|
83 |
+
|
84 |
+
for param in model.parameters():
|
85 |
+
self.assertTrue(not param.requires_grad)
|
86 |
+
|
87 |
+
config = config_cls(
|
88 |
+
base_model_name_or_path=model_id,
|
89 |
+
**config_kwargs,
|
90 |
+
)
|
91 |
+
model = get_peft_model(model, config)
|
92 |
+
|
93 |
+
# For backward compatibility
|
94 |
+
if hasattr(model, "enable_input_require_grads"):
|
95 |
+
model.enable_input_require_grads()
|
96 |
+
else:
|
97 |
+
|
98 |
+
def make_inputs_require_grad(module, input, output):
|
99 |
+
output.requires_grad_(True)
|
100 |
+
|
101 |
+
model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
|
102 |
+
|
103 |
+
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
104 |
+
dummy_output = model.get_input_embeddings()(dummy_input)
|
105 |
+
|
106 |
+
self.assertTrue(dummy_output.requires_grad)
|
107 |
+
|
108 |
+
@parameterized.expand(PeftTestConfigManager.get_grid_parameters(PEFT_MODELS_TO_TEST))
|
109 |
+
def test_prepare_for_training_parametrized(self, test_name, model_id, config_cls, config_kwargs):
|
110 |
+
self._test_prepare_for_training(model_id, config_cls, config_kwargs)
|
111 |
+
|
112 |
+
def _test_save_pretrained(self, model_id, config_cls, config_kwargs):
|
113 |
+
model = AutoModelForCausalLM.from_pretrained(model_id)
|
114 |
+
config = config_cls(
|
115 |
+
base_model_name_or_path=model_id,
|
116 |
+
**config_kwargs,
|
117 |
+
)
|
118 |
+
model = get_peft_model(model, config)
|
119 |
+
model = model.to(self.torch_device)
|
120 |
+
|
121 |
+
with tempfile.TemporaryDirectory() as tmp_dirname:
|
122 |
+
model.save_pretrained(tmp_dirname)
|
123 |
+
|
124 |
+
model_from_pretrained = AutoModelForCausalLM.from_pretrained(model_id)
|
125 |
+
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname)
|
126 |
+
|
127 |
+
# check if the state dicts are equal
|
128 |
+
state_dict = get_peft_model_state_dict(model)
|
129 |
+
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
130 |
+
|
131 |
+
# check if same keys
|
132 |
+
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
133 |
+
|
134 |
+
# check if tensors equal
|
135 |
+
for key in state_dict.keys():
|
136 |
+
self.assertTrue(
|
137 |
+
torch.allclose(
|
138 |
+
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
139 |
+
)
|
140 |
+
)
|
141 |
+
|
142 |
+
# check if `adapter_model.bin` is present
|
143 |
+
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin")))
|
144 |
+
|
145 |
+
# check if `adapter_config.json` is present
|
146 |
+
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
147 |
+
|
148 |
+
# check if `pytorch_model.bin` is not present
|
149 |
+
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin")))
|
150 |
+
|
151 |
+
# check if `config.json` is not present
|
152 |
+
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
153 |
+
|
154 |
+
@parameterized.expand(PeftTestConfigManager.get_grid_parameters(PEFT_MODELS_TO_TEST))
|
155 |
+
def test_save_pretrained(self, test_name, model_id, config_cls, config_kwargs):
|
156 |
+
self._test_save_pretrained(model_id, config_cls, config_kwargs)
|
SVFT-main/LLM-Adapters/peft/tests/testing_common.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
from collections import OrderedDict
|
16 |
+
|
17 |
+
from peft import (
|
18 |
+
LoraConfig,
|
19 |
+
PrefixTuningConfig,
|
20 |
+
PromptEncoderConfig,
|
21 |
+
PromptTuningConfig,
|
22 |
+
)
|
23 |
+
|
24 |
+
|
25 |
+
CONFIG_CLASSES = (
|
26 |
+
LoraConfig,
|
27 |
+
PrefixTuningConfig,
|
28 |
+
PromptEncoderConfig,
|
29 |
+
PromptTuningConfig,
|
30 |
+
)
|
31 |
+
CONFIG_TESTING_KWARGS = (
|
32 |
+
{
|
33 |
+
"r": 8,
|
34 |
+
"lora_alpha": 32,
|
35 |
+
"target_modules": None,
|
36 |
+
"lora_dropout": 0.05,
|
37 |
+
"bias": "none",
|
38 |
+
"task_type": "CAUSAL_LM",
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"num_virtual_tokens": 10,
|
42 |
+
"task_type": "CAUSAL_LM",
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"num_virtual_tokens": 10,
|
46 |
+
"encoder_hidden_size": 32,
|
47 |
+
"task_type": "CAUSAL_LM",
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"num_virtual_tokens": 10,
|
51 |
+
"task_type": "CAUSAL_LM",
|
52 |
+
},
|
53 |
+
)
|
54 |
+
|
55 |
+
CLASSES_MAPPING = {
|
56 |
+
"lora": (LoraConfig, CONFIG_TESTING_KWARGS[0]),
|
57 |
+
"prefix_tuning": (PrefixTuningConfig, CONFIG_TESTING_KWARGS[1]),
|
58 |
+
"prompt_encoder": (PromptEncoderConfig, CONFIG_TESTING_KWARGS[2]),
|
59 |
+
"prompt_tuning": (PromptTuningConfig, CONFIG_TESTING_KWARGS[3]),
|
60 |
+
}
|
61 |
+
|
62 |
+
|
63 |
+
# Adapted from https://github.com/huggingface/transformers/blob/48327c57182fdade7f7797d1eaad2d166de5c55b/src/transformers/activations.py#LL166C7-L166C22
|
64 |
+
class ClassInstantier(OrderedDict):
|
65 |
+
def __getitem__(self, key, *args, **kwargs):
|
66 |
+
# check if any of the kwargs is inside the config class kwargs
|
67 |
+
if any([kwarg in self[key][1] for kwarg in kwargs]):
|
68 |
+
new_config_kwargs = self[key][1].copy()
|
69 |
+
new_config_kwargs.update(kwargs)
|
70 |
+
return (self[key][0], new_config_kwargs)
|
71 |
+
|
72 |
+
return super().__getitem__(key, *args, **kwargs)
|
73 |
+
|
74 |
+
def get_grid_parameters(self, model_list):
|
75 |
+
r"""
|
76 |
+
Returns a list of all possible combinations of the parameters in the config classes.
|
77 |
+
"""
|
78 |
+
grid_parameters = []
|
79 |
+
for model_tuple in model_list:
|
80 |
+
model_id, lora_kwargs, prefix_tuning_kwargs, prompt_encoder_kwargs, prompt_tuning_kwargs = model_tuple
|
81 |
+
for key, value in self.items():
|
82 |
+
if key == "lora":
|
83 |
+
# update value[1] if necessary
|
84 |
+
if lora_kwargs is not None:
|
85 |
+
value[1].update(lora_kwargs)
|
86 |
+
elif key == "prefix_tuning":
|
87 |
+
# update value[1] if necessary
|
88 |
+
if prefix_tuning_kwargs is not None:
|
89 |
+
value[1].update(prefix_tuning_kwargs)
|
90 |
+
elif key == "prompt_encoder":
|
91 |
+
# update value[1] if necessary
|
92 |
+
if prompt_encoder_kwargs is not None:
|
93 |
+
value[1].update(prompt_encoder_kwargs)
|
94 |
+
else:
|
95 |
+
# update value[1] if necessary
|
96 |
+
if prompt_tuning_kwargs is not None:
|
97 |
+
value[1].update(prompt_tuning_kwargs)
|
98 |
+
grid_parameters.append((f"test_{model_id}_{key}", model_id, value[0], value[1]))
|
99 |
+
|
100 |
+
return grid_parameters
|
101 |
+
|
102 |
+
|
103 |
+
PeftTestConfigManager = ClassInstantier(CLASSES_MAPPING)
|
SVFT-main/LLM-Adapters/peft/tests/testing_utils.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
import unittest
|
16 |
+
|
17 |
+
import torch
|
18 |
+
|
19 |
+
|
20 |
+
def require_torch_gpu(test_case):
|
21 |
+
"""
|
22 |
+
Decorator marking a test that requires a GPU. Will be skipped when no GPU is available.
|
23 |
+
"""
|
24 |
+
if not torch.cuda.is_available():
|
25 |
+
return unittest.skip("test requires GPU")(test_case)
|
26 |
+
else:
|
27 |
+
return test_case
|
28 |
+
|
29 |
+
|
30 |
+
def require_torch_multi_gpu(test_case):
|
31 |
+
"""
|
32 |
+
Decorator marking a test that requires multiple GPUs. Will be skipped when less than 2 GPUs are available.
|
33 |
+
"""
|
34 |
+
if not torch.cuda.is_available() or torch.cuda.device_count() < 2:
|
35 |
+
return unittest.skip("test requires multiple GPUs")(test_case)
|
36 |
+
else:
|
37 |
+
return test_case
|
38 |
+
|
39 |
+
|
40 |
+
def require_bitsandbytes(test_case):
|
41 |
+
"""
|
42 |
+
Decorator marking a test that requires the bitsandbytes library. Will be skipped when the library is not installed.
|
43 |
+
"""
|
44 |
+
try:
|
45 |
+
import bitsandbytes # noqa: F401
|
46 |
+
except ImportError:
|
47 |
+
return unittest.skip("test requires bitsandbytes")(test_case)
|
48 |
+
else:
|
49 |
+
return test_case
|
SVFT-main/LLM-Adapters/picture.jpg
ADDED
![]() |
SVFT-main/LLM-Adapters/pyproject.toml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.black]
|
2 |
+
line-length = 79
|
3 |
+
|
4 |
+
[tool.isort]
|
5 |
+
include_trailing_comma = true
|
6 |
+
line_length = 79
|
7 |
+
multi_line_output = 3
|
8 |
+
profile = "black"
|
SVFT-main/LLM-Adapters/requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate
|
2 |
+
appdirs
|
3 |
+
bitsandbytes
|
4 |
+
black
|
5 |
+
black[jupyter]
|
6 |
+
datasets
|
7 |
+
fire
|
8 |
+
git+https://github.com/huggingface/transformers.git
|
9 |
+
gradio
|
SVFT-main/LLM-Adapters/run_commonsense.sh
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
#SVFT_PLAIN
|
4 |
+
WORLD_SIZE=1 CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=3191 finetune.py \
|
5 |
+
--base_model 'google/gemma-2b' \
|
6 |
+
--data_path './ft-training_set/commonsense_15k.json' \
|
7 |
+
--output_dir './Gemma_2B_svft_CR15K/' \
|
8 |
+
--batch_size 64 \
|
9 |
+
--micro_batch_size 4 \
|
10 |
+
--num_epochs 3 \
|
11 |
+
--learning_rate 5e-2 \
|
12 |
+
--cutoff_len 512\
|
13 |
+
--val_set_size 120 \
|
14 |
+
--adapter_name svft \
|
15 |
+
--off_diag 0 \
|
16 |
+
--pattern "banded" \
|
17 |
+
--lora_target_modules "q_proj","v_proj","k_proj","o_proj","up_proj","down_proj","gate_proj"
|
18 |
+
|
19 |
+
#SVFT_Random_d=16
|
20 |
+
WORLD_SIZE=1 CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=3191 finetune.py \
|
21 |
+
--base_model 'google/gemma-2b' \
|
22 |
+
--data_path './ft-training_set/commonsense_15k.json' \
|
23 |
+
--output_dir './Gemma_2B_svft_16diag_random_CR15K/' \
|
24 |
+
--batch_size 64 \
|
25 |
+
--micro_batch_size 4 \
|
26 |
+
--num_epochs 3 \
|
27 |
+
--learning_rate 5e-3 \
|
28 |
+
--cutoff_len 512\
|
29 |
+
--val_set_size 120 \
|
30 |
+
--adapter_name svft \
|
31 |
+
--off_diag 16 \
|
32 |
+
--pattern "random" \
|
33 |
+
--lora_target_modules "q_proj","v_proj","k_proj","o_proj","up_proj","down_proj","gate_proj"
|
SVFT-main/MetaMath/LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
SVFT-main/MetaMath/README.MD
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MetaMath: Bootstrap Your Own Mathematical Questions for Large Language Models
|
2 |
+
|
3 |
+
[](CODE_LICENSE)
|
4 |
+
[](MetaMath/LICENSE)
|
5 |
+
[](https://www.python.org/downloads/release/python-390/)
|
6 |
+
|
7 |
+
<p align="center">
|
8 |
+
🤗 <a href="https://huggingface.co/meta-math" target="_blank">HF Repo</a> • 📃 <a href="https://arxiv.org/abs/2309.12284" target="_blank">[MetaMath]</a><br>
|
9 |
+
</p>
|
10 |
+
|
11 |
+
<p align="center" width="100%">
|
12 |
+
<a ><img src="./imgs/metamath.svg" alt="MetaMath" style="width: 80%; min-width: 300px; display: block; margin: auto;"></a>
|
13 |
+
</p>
|
14 |
+
|
15 |
+
|
16 |
+
## News
|
17 |
+
- 🔥 Our **MetaMath-Llemma-7B** model achieves **30.0 pass@1** on the MATH Benchmarks, surpassing all the SOTA open-source LLM in 7B-13B scales! All the training scripts and the model are opened.
|
18 |
+
- 🔥 Our **MetaMath-Mistral-7B** model achieves **77.7 pass@1** on the [GSM8k Benchmarks](https://github.com/openai/grade-school-math), surpassing all the SOTA open-source LLM! All the training scripts and the model are opened.
|
19 |
+
- 🔥 The full **MetaMathQA** dataset is now released in the huggingface [MetaMathQA](https://huggingface.co/datasets/meta-math/MetaMathQA/tree/main)!
|
20 |
+
- 🔥 We released the GSM8K_Backward dataset is also released in the huggingface [GSM8K_Backward](https://huggingface.co/datasets/meta-math/GSM8K_Backward) to evaluate the reversal mathematical reasoning ability!
|
21 |
+
- 🔥 Although the data augmentation for **MetaMathQA** is sourced from **ChatGPT 3.5**, Our **MetaMath-70B** model outperforms the closed-source LLMs **ChatGPT 3.5** on the GSM8K!
|
22 |
+
- 🔥 Our **MetaMath-7B** model achieves **66.5 pass@1** on the [GSM8k Benchmarks](https://github.com/openai/grade-school-math), **11.6** points higher than the SOTA open-source LLM!
|
23 |
+
- 🔥 Our **MetaMath-7B** model achieves **19.8 pass@1** on the [MATH Benchmarks](https://github.com/hendrycks/math), **9.1** points higher than the SOTA open-source LLM!
|
24 |
+
|
25 |
+
| Model | Checkpoint | Paper | GSM8k | MATH | License|
|
26 |
+
| ----- |------| ---- |------|-------| ----- |
|
27 |
+
| MetaMath-70B-V1.0 | 🤗 <a href="https://huggingface.co/meta-math/MetaMath-70B-V1.0" target="_blank">HF Link</a> | 📃 <a href="https://arxiv.org/abs/2309.12284" target="_blank">[MetaMath]</a>| **82.3** | **26.6** | <a href="https://ai.meta.com/resources/models-and-libraries/llama-downloads/" target="_blank">Llama 2 </a> |
|
28 |
+
| MetaMath-13B-V1.0 | 🤗 <a href="https://huggingface.co/meta-math/MetaMath-13B-V1.0" target="_blank">HF Link</a> | 📃 <a href="https://arxiv.org/abs/2309.12284" target="_blank">[MetaMath]</a>| **72.3** | **22.4** | <a href="https://ai.meta.com/resources/models-and-libraries/llama-downloads/" target="_blank">Llama 2 </a> |
|
29 |
+
| MetaMath-7B-V1.0 | 🤗 <a href="https://huggingface.co/meta-math/MetaMath-7B-V1.0" target="_blank">HF Link</a> | 📃 <a href="https://arxiv.org/abs/2309.12284" target="_blank">[MetaMath]</a>| **66.5** | **19.8** | <a href="https://ai.meta.com/resources/models-and-libraries/llama-downloads/" target="_blank">Llama 2 </a>|
|
30 |
+
| MetaMath-Mistral-7B | 🤗 <a href="https://huggingface.co/meta-math/MetaMath-Mistral-7B" target="_blank">HF Link</a> | 📃 <a href="https://arxiv.org/abs/2309.12284" target="_blank">[MetaMath]</a>| **77.7** | **28.2** | <a href="http://www.apache.org/licenses/" target="_blank">Apache License 2.0 </a>|
|
31 |
+
| MetaMath-Llemma-7B | 🤗 <a href="https://huggingface.co/meta-math/MetaMath-Llemma-7B" target="_blank">HF Link</a> | 📃 <a href="https://arxiv.org/abs/2309.12284" target="_blank">[MetaMath]</a>| **69.2** | **30.0** | <a href="http://www.apache.org/licenses/" target="_blank">Apache License 2.0 </a>|
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
## Comparing MetaMath with the LLM models.
|
36 |
+
|
37 |
+
🔥 Comprehensive Results
|
38 |
+
|
39 |
+
| Model | GSM8k Pass@1 | MATH Pass@1 |
|
40 |
+
|---------------------|--------------|-------------|
|
41 |
+
| MPT-7B | 6.8 | 3.0 |
|
42 |
+
| Falcon-7B | 6.8 | 2.3 |
|
43 |
+
| LLaMA-1-7B | 11.0 | 2.9 |
|
44 |
+
| LLaMA-2-7B | 14.6 | 2.5 |
|
45 |
+
| MPT-30B | 15.2 | 3.1 |
|
46 |
+
| LLaMA-1-13B | 17.8 | 3.9 |
|
47 |
+
| GPT-Neo-2.7B | 19.5 | -- |
|
48 |
+
| Falcon-40B | 19.6 | 2.5 |
|
49 |
+
| Baichuan-chat-13B | 23.9 | -- |
|
50 |
+
| Vicuna-v1.3-13B | 27.6 | -- |
|
51 |
+
| LLaMA-2-13B | 28.7 | 3.9 |
|
52 |
+
| InternLM-7B | 31.2 | -- |
|
53 |
+
| ChatGLM-2-6B | 32.4 | -- |
|
54 |
+
| GPT-J-6B | 34.9 | -- |
|
55 |
+
| LLaMA-1-33B | 35.6 | 3.9 |
|
56 |
+
| LLaMA-2-34B | 42.2 | 6.24 |
|
57 |
+
| RFT-7B | 50.3 | -- |
|
58 |
+
| LLaMA-1-65B | 50.9 | 10.6 |
|
59 |
+
| Qwen-7B | 51.6 | -- |
|
60 |
+
| WizardMath-7B | 54.9 | 10.7 |
|
61 |
+
| LLaMA-2-70B | 56.8 | 13.5 |
|
62 |
+
| WizardMath-13B | 63.9 | 14.0 |
|
63 |
+
| 🔥 MetaMath-7B | **66.5** | **19.8** |
|
64 |
+
| 🔥 MetaMath-13B | **72.3** | **22.4** |
|
65 |
+
| 🔥 MetaMath-Mistral-7B | **77.7** | **28.2** |
|
66 |
+
| 🔥 MetaMath-Llemma-7B | **69.2** | **30.0** |
|
67 |
+
| WizardMath-70B | 81.6 | 22.7 |
|
68 |
+
| 🔥 MetaMath-70B | **82.3** | **26.6** |
|
69 |
+
|
70 |
+
<h2 id="env">Quick Start</h2>
|
71 |
+
|
72 |
+
Clone Metamath and install the required packages:
|
73 |
+
|
74 |
+
```bash
|
75 |
+
git clone https://github.com/meta-math/MetaMath.git
|
76 |
+
cd MetaMath
|
77 |
+
pip install -r requirements.txt
|
78 |
+
```
|
79 |
+
|
80 |
+
If you encounter a Ray installation problem, please run:
|
81 |
+
|
82 |
+
```bash
|
83 |
+
pip install --upgrade ray
|
84 |
+
pip install --upgrade pyarrow
|
85 |
+
pip install pandas
|
86 |
+
```
|
87 |
+
|
88 |
+
<h2 id="Inference">Dataset Usage</h2>
|
89 |
+
|
90 |
+
Run the following command to load the data:
|
91 |
+
|
92 |
+
```python
|
93 |
+
from datasets import load_dataset
|
94 |
+
dataset = load_dataset("meta-math/MetaMathQA")
|
95 |
+
```
|
96 |
+
|
97 |
+
|
98 |
+
<h2 id="train">Training</h2>
|
99 |
+
|
100 |
+
you need to prepare the llama-2 base model and our **MetaMathQA** dataset huggingface [MetaMathQA](https://huggingface.co/datasets/meta-math/MetaMathQA/tree/main)
|
101 |
+
|
102 |
+
```
|
103 |
+
bash run.sh
|
104 |
+
```
|
105 |
+
or
|
106 |
+
|
107 |
+
```
|
108 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m torch.distributed.launch --master_addr ${MASTER_ADDR} --master_port ${MASTER_PORT} --nproc_per_node=8 --use_env train_math.py \
|
109 |
+
--model_name_or_path "meta-llama/Llama-2-7b-hf" \
|
110 |
+
--data_path "path/to/metamathqa" \
|
111 |
+
--data_length 10000000 \
|
112 |
+
--bf16 True \
|
113 |
+
--output_dir "path/to/save" \
|
114 |
+
--num_train_epochs 3 \
|
115 |
+
--per_device_train_batch_size 4 \
|
116 |
+
--per_device_eval_batch_size 4 \
|
117 |
+
--gradient_accumulation_steps 4 \
|
118 |
+
--evaluation_strategy "no" \
|
119 |
+
--save_strategy "steps" \
|
120 |
+
--save_steps 1000 \
|
121 |
+
--save_total_limit 2 \
|
122 |
+
--learning_rate 2e-5 \
|
123 |
+
--weight_decay 0. \
|
124 |
+
--warmup_ratio 0.03 \
|
125 |
+
--lr_scheduler_type "cosine" \
|
126 |
+
--logging_steps 1 \
|
127 |
+
--fsdp "full_shard auto_wrap" \
|
128 |
+
--fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \
|
129 |
+
--tf32 True
|
130 |
+
```
|
131 |
+
|
132 |
+
### Supervised fine-tuning
|
133 |
+
|
134 |
+
We supervised fine-tune MetaMath-7B with the following hyperparameters:
|
135 |
+
|
136 |
+
| Hyperparameter | LLaMA 2 7B |
|
137 |
+
|----------------|-------------|
|
138 |
+
| Batch size | 128 |
|
139 |
+
| Learning rate | 2e-5 |
|
140 |
+
| Epochs | 3 |
|
141 |
+
| Max length | 512 |
|
142 |
+
| LR scheduler | cosine |
|
143 |
+
|
144 |
+
<h2 id="evaluation">Evaluation</h2>
|
145 |
+
|
146 |
+
we use the vllm to help the fast generation:
|
147 |
+
|
148 |
+
```
|
149 |
+
python eval_gsm8k.py --model "path/to/save" --data_file ./data/test/GSM8K_test.jsonl
|
150 |
+
python eval_math.py --model "path/to/save" --data_file ./data/test/MATH_test.jsonl
|
151 |
+
```
|
152 |
+
where the "path/to/save" should be replaced by the finetuned model, you can also download our series of MetaMath models in huggingface:
|
153 |
+
🤗 <a href="https://huggingface.co/meta-math/MetaMath-7B-V1.0" target="_blank">MetaMath 7B</a> 🤗 <a href="https://huggingface.co/meta-math/MetaMath-13B-V1.0" target="_blank">MetaMath 13B</a> 🤗 <a href="https://huggingface.co/meta-math/MetaMath-70B-V1.0" target="_blank">MetaMath 70B</a>
|
154 |
+
|
155 |
+
The inference prompt for our MetaMath is:
|
156 |
+
```
|
157 |
+
"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response: Let's think step by step."
|
158 |
+
```
|
159 |
+
|
160 |
+
Thanks for the open source code of [WizardMath](https://github.com/nlpxucan/WizardLM/tree/main/WizardMath) and [RFT](https://github.com/OFA-Sys/gsm8k-ScRel/tree/main). Some of our codes are based on them.
|
161 |
+
|
162 |
+
<h2 id="citation">Citation</h2>
|
163 |
+
Please cite the paper if you refer to our model, code, data or paper from MetaMath.
|
164 |
+
|
165 |
+
```
|
166 |
+
@article{yu2023metamath,
|
167 |
+
title={MetaMath: Bootstrap Your Own Mathematical Questions for Large Language Models},
|
168 |
+
author={Yu, Longhui and Jiang, Weisen and Shi, Han and Yu, Jincheng and Liu, Zhengying and Zhang, Yu and Kwok, James T and Li, Zhenguo and Weller, Adrian and Liu, Weiyang},
|
169 |
+
journal={arXiv preprint arXiv:2309.12284},
|
170 |
+
year={2023}
|
171 |
+
}
|
172 |
+
```
|
SVFT-main/MetaMath/data/README.md
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MetaMathQA Data
|
2 |
+
|
3 |
+
## Train Data
|
4 |
+
The full **MetaMathQA** dataset is now released in the huggingface [MetaMathQA](https://huggingface.co/datasets/meta-math/MetaMathQA/tree/main)!
|
5 |
+
|
6 |
+
## Test Data
|
7 |
+
We released the GSM8K_Backward dataset is also released in the huggingface [GSM8K_Backward](https://huggingface.co/datasets/meta-math/GSM8K_Backward) to evaluate the reversal mathematical reasoning ability!
|
SVFT-main/MetaMath/data/test/GSM8K_Backward.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
SVFT-main/MetaMath/data/test/GSM8K_test.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
SVFT-main/MetaMath/data/test/MATH_test.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
SVFT-main/MetaMath/data/train/README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# MetaMathQA
|
2 |
+
|
3 |
+
The full **MetaMathQA** dataset is now released in the huggingface [MetaMathQA](https://huggingface.co/datasets/meta-math/MetaMathQA/tree/main)
|
SVFT-main/MetaMath/eval_gsm8k.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import json
|
3 |
+
import re
|
4 |
+
import jsonlines
|
5 |
+
from fraction import Fraction
|
6 |
+
from vllm import LLM, SamplingParams
|
7 |
+
import sys
|
8 |
+
MAX_INT = sys.maxsize
|
9 |
+
|
10 |
+
def is_number(s):
|
11 |
+
try:
|
12 |
+
float(s)
|
13 |
+
return True
|
14 |
+
except ValueError:
|
15 |
+
pass
|
16 |
+
try:
|
17 |
+
import unicodedata
|
18 |
+
unicodedata.numeric(s)
|
19 |
+
return True
|
20 |
+
except (TypeError, ValueError):
|
21 |
+
pass
|
22 |
+
return False
|
23 |
+
|
24 |
+
def extract_answer_number(completion):
|
25 |
+
text = completion.split('The answer is: ')
|
26 |
+
if len(text) > 1:
|
27 |
+
extract_ans = text[-1].strip()
|
28 |
+
match = re.search(r'[\-+]?\d*[\.,/]?\d+', extract_ans)
|
29 |
+
if match:
|
30 |
+
if '/' in match.group():
|
31 |
+
denominator = match.group().split('/')[1]
|
32 |
+
numerator = match.group().split('/')[0]
|
33 |
+
if is_number(denominator) == True and is_number(numerator) == True:
|
34 |
+
if denominator == '0':
|
35 |
+
return round(float(numerator.replace(',', '')))
|
36 |
+
else:
|
37 |
+
frac = Fraction(match.group().replace(',', ''))
|
38 |
+
num_numerator = frac.numerator
|
39 |
+
num_denominator = frac.denominator
|
40 |
+
return round(float(num_numerator / num_denominator))
|
41 |
+
else:
|
42 |
+
return None
|
43 |
+
else:
|
44 |
+
if float(match.group().replace(',', '')) == float('inf'):
|
45 |
+
return None
|
46 |
+
return round(float(match.group().replace(',', '')))
|
47 |
+
else:
|
48 |
+
return None
|
49 |
+
else:
|
50 |
+
return None
|
51 |
+
|
52 |
+
def batch_data(data_list, batch_size=1):
|
53 |
+
n = len(data_list) // batch_size
|
54 |
+
batch_data = []
|
55 |
+
for i in range(n-1):
|
56 |
+
start = i * batch_size
|
57 |
+
end = (i+1)*batch_size
|
58 |
+
batch_data.append(data_list[start:end])
|
59 |
+
|
60 |
+
last_start = (n-1) * batch_size
|
61 |
+
last_end = MAX_INT
|
62 |
+
batch_data.append(data_list[last_start:last_end])
|
63 |
+
return batch_data
|
64 |
+
|
65 |
+
|
66 |
+
def gsm8k_test(model, data_path, start=0, end=MAX_INT, batch_size=1, tensor_parallel_size=1):
|
67 |
+
INVALID_ANS = "[invalid]"
|
68 |
+
gsm8k_ins = []
|
69 |
+
gsm8k_answers = []
|
70 |
+
problem_prompt = (
|
71 |
+
"Below is an instruction that describes a task. "
|
72 |
+
"Write a response that appropriately completes the request.\n\n"
|
73 |
+
"### Instruction:\n{instruction}\n\n### Response: Let's think step by step."
|
74 |
+
)
|
75 |
+
print('promt =====', problem_prompt)
|
76 |
+
with open(data_path,"r+", encoding="utf8") as f:
|
77 |
+
for idx, item in enumerate(jsonlines.Reader(f)):
|
78 |
+
temp_instr = problem_prompt.format(instruction=item["query"])
|
79 |
+
gsm8k_ins.append(temp_instr)
|
80 |
+
temp_ans = item['response'].split('#### ')[1]
|
81 |
+
temp_ans = int(temp_ans.replace(',', ''))
|
82 |
+
gsm8k_answers.append(temp_ans)
|
83 |
+
|
84 |
+
gsm8k_ins = gsm8k_ins[start:end]
|
85 |
+
gsm8k_answers = gsm8k_answers[start:end]
|
86 |
+
print('lenght ====', len(gsm8k_ins))
|
87 |
+
batch_gsm8k_ins = batch_data(gsm8k_ins, batch_size=batch_size)
|
88 |
+
|
89 |
+
stop_tokens = ["Question:", "Question", "USER:", "USER", "ASSISTANT:", "ASSISTANT", "Instruction:", "Instruction", "Response:", "Response"]
|
90 |
+
sampling_params = SamplingParams(temperature=0.0, top_p=1, max_tokens=512, stop=stop_tokens)
|
91 |
+
print('sampleing =====', sampling_params)
|
92 |
+
llm = LLM(model=model,tensor_parallel_size=tensor_parallel_size)
|
93 |
+
result = []
|
94 |
+
res_completions = []
|
95 |
+
for idx, (prompt, prompt_answer) in enumerate(zip(batch_gsm8k_ins, gsm8k_answers)):
|
96 |
+
if isinstance(prompt, list):
|
97 |
+
pass
|
98 |
+
else:
|
99 |
+
prompt = [prompt]
|
100 |
+
|
101 |
+
completions = llm.generate(prompt, sampling_params)
|
102 |
+
for output in completions:
|
103 |
+
prompt = output.prompt
|
104 |
+
generated_text = output.outputs[0].text
|
105 |
+
res_completions.append(generated_text)
|
106 |
+
|
107 |
+
invalid_outputs = []
|
108 |
+
for idx, (prompt, completion, prompt_answer) in enumerate(zip(gsm8k_ins, res_completions, gsm8k_answers)):
|
109 |
+
doc = {'question': prompt}
|
110 |
+
y_pred = extract_answer_number(completion)
|
111 |
+
if y_pred != None:
|
112 |
+
result.append(float(y_pred) == float(prompt_answer))
|
113 |
+
else:
|
114 |
+
result.append(False)
|
115 |
+
temp = {'question': prompt, 'output': completion, 'answer': prompt_answer}
|
116 |
+
invalid_outputs.append(temp)
|
117 |
+
acc = sum(result) / len(result)
|
118 |
+
print('len invalid outputs ====', len(invalid_outputs), ', valid_outputs===', invalid_outputs)
|
119 |
+
print('start===', start, ', end====', end)
|
120 |
+
print('gsm8k length====', len(result), ', gsm8k acc====', acc)
|
121 |
+
|
122 |
+
|
123 |
+
def parse_args():
|
124 |
+
parser = argparse.ArgumentParser()
|
125 |
+
parser.add_argument("--model", type=str) # model path
|
126 |
+
parser.add_argument("--data_file", type=str, default='') # data path
|
127 |
+
parser.add_argument("--start", type=int, default=0) #start index
|
128 |
+
parser.add_argument("--end", type=int, default=MAX_INT) # end index
|
129 |
+
parser.add_argument("--batch_size", type=int, default=400) # batch_size
|
130 |
+
parser.add_argument("--tensor_parallel_size", type=int, default=8) # tensor_parallel_size
|
131 |
+
return parser.parse_args()
|
132 |
+
if __name__ == "__main__":
|
133 |
+
args = parse_args()
|
134 |
+
gsm8k_test(model=args.model, data_path=args.data_file, start=args.start, end=args.end, batch_size=args.batch_size, tensor_parallel_size=args.tensor_parallel_size)
|
SVFT-main/MetaMath/eval_math.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import json
|
3 |
+
import pdb
|
4 |
+
import jsonlines
|
5 |
+
|
6 |
+
import util
|
7 |
+
from vllm import LLM, SamplingParams
|
8 |
+
import sys
|
9 |
+
MAX_INT = sys.maxsize
|
10 |
+
INVALID_ANS = "[invalid]"
|
11 |
+
|
12 |
+
invalid_outputs = []
|
13 |
+
def remove_boxed(s):
|
14 |
+
left = "\\boxed{"
|
15 |
+
try:
|
16 |
+
assert s[:len(left)] == left
|
17 |
+
assert s[-1] == "}"
|
18 |
+
return s[len(left):-1]
|
19 |
+
except:
|
20 |
+
return None
|
21 |
+
|
22 |
+
def process_results(doc, completion, answer):
|
23 |
+
split_ans = completion.split('The answer is: ')
|
24 |
+
if len(split_ans) > 1:
|
25 |
+
ans = split_ans[-1]
|
26 |
+
extract_ans_temp = ans.split('.\n')[0]
|
27 |
+
extract_ans_temp = extract_ans_temp.strip()
|
28 |
+
if len(extract_ans_temp)>0 and extract_ans_temp[-1] == '.':
|
29 |
+
extract_ans = extract_ans_temp[0:-1]
|
30 |
+
else:
|
31 |
+
extract_ans = extract_ans_temp
|
32 |
+
extract_ans = extract_ans.strip()
|
33 |
+
if util.is_equiv(extract_ans, answer):
|
34 |
+
return True
|
35 |
+
else:
|
36 |
+
return False
|
37 |
+
else:
|
38 |
+
temp = {'question': doc, 'output': completion, 'answer': answer}
|
39 |
+
invalid_outputs.append(temp)
|
40 |
+
return False
|
41 |
+
def batch_data(data_list, batch_size=1):
|
42 |
+
n = len(data_list) // batch_size
|
43 |
+
batch_data = []
|
44 |
+
for i in range(n-1):
|
45 |
+
start = i * batch_size
|
46 |
+
end = (i+1)*batch_size
|
47 |
+
batch_data.append(data_list[start:end])
|
48 |
+
|
49 |
+
last_start = (n-1) * batch_size
|
50 |
+
last_end = MAX_INT
|
51 |
+
batch_data.append(data_list[last_start:last_end])
|
52 |
+
return batch_data
|
53 |
+
|
54 |
+
def test_hendrycks_math(model, data_path, start=0, end=MAX_INT, batch_size=1, tensor_parallel_size=1):
|
55 |
+
hendrycks_math_ins = []
|
56 |
+
hendrycks_math_answers = []
|
57 |
+
problem_prompt = (
|
58 |
+
"Below is an instruction that describes a task. "
|
59 |
+
"Write a response that appropriately completes the request.\n\n"
|
60 |
+
"### Instruction:\n{instruction}\n\n### Response: Let's think step by step."
|
61 |
+
)
|
62 |
+
print('promt =====', problem_prompt)
|
63 |
+
with open(data_path, "r+", encoding="utf8") as f:
|
64 |
+
for idx, item in enumerate(jsonlines.Reader(f)):
|
65 |
+
temp_instr = problem_prompt.format(instruction=item["instruction"])
|
66 |
+
hendrycks_math_ins.append(temp_instr)
|
67 |
+
solution = item['output']
|
68 |
+
temp_ans = remove_boxed(util.last_boxed_only_string(solution))
|
69 |
+
hendrycks_math_answers.append(temp_ans)
|
70 |
+
|
71 |
+
print('total length ===', len(hendrycks_math_ins))
|
72 |
+
hendrycks_math_ins = hendrycks_math_ins[start:end]
|
73 |
+
hendrycks_math_answers = hendrycks_math_answers[start:end]
|
74 |
+
print('lenght ====', len(hendrycks_math_ins))
|
75 |
+
batch_hendrycks_math_ins = batch_data(hendrycks_math_ins, batch_size=batch_size)
|
76 |
+
|
77 |
+
stop_tokens = ["Question:", "Question", "USER:", "USER", "ASSISTANT:", "ASSISTANT", "Instruction:", "Instruction", "Response:", "Response"]
|
78 |
+
sampling_params = SamplingParams(temperature=0, top_p=1, max_tokens=2048, stop=stop_tokens)
|
79 |
+
print('sampleing =====', sampling_params)
|
80 |
+
llm = LLM(model=model,tensor_parallel_size=tensor_parallel_size)
|
81 |
+
res_completions = []
|
82 |
+
for idx, (prompt, prompt_answer) in enumerate(zip(batch_hendrycks_math_ins, hendrycks_math_answers)):
|
83 |
+
if isinstance(prompt, list):
|
84 |
+
pass
|
85 |
+
else:
|
86 |
+
prompt = [prompt]
|
87 |
+
completions = llm.generate(prompt, sampling_params)
|
88 |
+
for output in completions:
|
89 |
+
prompt_temp = output.prompt
|
90 |
+
generated_text = output.outputs[0].text
|
91 |
+
res_completions.append(generated_text)
|
92 |
+
|
93 |
+
results = []
|
94 |
+
for idx, (prompt, completion, prompt_answer) in enumerate(zip(hendrycks_math_ins, res_completions, hendrycks_math_answers)):
|
95 |
+
res = process_results(prompt, completion, prompt_answer)
|
96 |
+
results.append(res)
|
97 |
+
|
98 |
+
acc = sum(results) / len(results)
|
99 |
+
print('len invalid outputs ====', len(invalid_outputs), ', valid_outputs===', invalid_outputs)
|
100 |
+
print('start===', start, ', end====',end)
|
101 |
+
print('length====', len(results), ', acc====', acc)
|
102 |
+
|
103 |
+
def parse_args():
|
104 |
+
parser = argparse.ArgumentParser()
|
105 |
+
parser.add_argument("--model", type=str, default='') # model path
|
106 |
+
parser.add_argument("--data_file", type=str, default='') # data path
|
107 |
+
parser.add_argument("--start", type=int, default=0) #start index
|
108 |
+
parser.add_argument("--end", type=int, default=MAX_INT) # end index
|
109 |
+
parser.add_argument("--batch_size", type=int, default=400) # batch_size
|
110 |
+
parser.add_argument("--tensor_parallel_size", type=int, default=8) # tensor_parallel_size
|
111 |
+
return parser.parse_args()
|
112 |
+
|
113 |
+
if __name__ == "__main__":
|
114 |
+
args = parse_args()
|
115 |
+
test_hendrycks_math(model=args.model, data_path=args.data_file, start=args.start, end=args.end, batch_size=args.batch_size, tensor_parallel_size=args.tensor_parallel_size)
|
SVFT-main/MetaMath/requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers>=4.34.0
|
2 |
+
wandb==0.15.3
|
3 |
+
torch==2.0.1
|
4 |
+
sentencepiece==0.1.99
|
5 |
+
tokenizers==0.13.3
|
6 |
+
accelerate==0.21.0
|
7 |
+
bitsandbytes==0.40.0
|
8 |
+
vllm
|
9 |
+
fraction
|
10 |
+
tqdm
|
11 |
+
numpy
|
12 |
+
fire
|
13 |
+
openai
|
14 |
+
scipy
|
15 |
+
jsonlines
|
16 |
+
pandas
|