Matthew Hollings commited on
Commit
6b9687e
·
1 Parent(s): ddc92b4

working with gpt2

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app.py +50 -0
  3. fine-tune-llm.ipynb +177 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ flagged/
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ # A sequence of lines both those typed in and the line so far
4
+ # when save is clicked the txt file is downloaded
5
+ lines = []
6
+
7
+
8
+ def greet(name):
9
+ return "Hello " + name + "!"
10
+
11
+
12
+ def add_to_lines(new_line):
13
+ # TODO: add new_line to the array
14
+ # TODO: send the full text to the language model generator
15
+ lines.append(new_line)
16
+ return "this is the next line in the poem"
17
+
18
+
19
+ def downloadtext():
20
+ # somehow print the values from the list
21
+ pass
22
+
23
+
24
+ # TODO: somehow loop and create all of the text added so far
25
+
26
+ with gr.Blocks() as demo:
27
+ gr.Markdown("Start typing below and then click **Run** to see the output.")
28
+ # Need to render a group of these
29
+ with gr.Group():
30
+ with gr.Row():
31
+ inp = gr.Textbox(placeholder="What is your name?")
32
+ out = gr.Textbox()
33
+ btn = gr.Button("Run")
34
+ btn.click(fn=add_to_lines, inputs=inp, outputs=out)
35
+
36
+
37
+ # demo = gr.Interface(
38
+ # fn=getnextline,
39
+ # inputs=gr.Textbox(lines=1, placeholder="..."),
40
+ # outputs=gr.Markdown(
41
+ # """
42
+ # text as output
43
+ # """
44
+ # ),
45
+ # allow_flagging="never",
46
+ # )
47
+
48
+
49
+ if __name__ == "__main__":
50
+ demo.launch()
fine-tune-llm.ipynb CHANGED
@@ -103,6 +103,183 @@
103
  "classifier(\"We are very happy to show you the 🤗 Transformers library.\")"
104
  ]
105
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  {
107
  "cell_type": "code",
108
  "execution_count": null,
 
103
  "classifier(\"We are very happy to show you the 🤗 Transformers library.\")"
104
  ]
105
  },
106
+ {
107
+ "cell_type": "code",
108
+ "execution_count": null,
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": [
112
+ "# Take a prompt and generate a line of text"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 3,
118
+ "metadata": {},
119
+ "outputs": [
120
+ {
121
+ "data": {
122
+ "application/vnd.jupyter.widget-view+json": {
123
+ "model_id": "6f80cb24ef764bd192e5d3af79f9f5f1",
124
+ "version_major": 2,
125
+ "version_minor": 0
126
+ },
127
+ "text/plain": [
128
+ "Downloading: 0%| | 0.00/665 [00:00<?, ?B/s]"
129
+ ]
130
+ },
131
+ "metadata": {},
132
+ "output_type": "display_data"
133
+ },
134
+ {
135
+ "data": {
136
+ "application/vnd.jupyter.widget-view+json": {
137
+ "model_id": "4e64c3a035b54f0c90ca5cc6e341ad21",
138
+ "version_major": 2,
139
+ "version_minor": 0
140
+ },
141
+ "text/plain": [
142
+ "Downloading: 0%| | 0.00/475M [00:00<?, ?B/s]"
143
+ ]
144
+ },
145
+ "metadata": {},
146
+ "output_type": "display_data"
147
+ },
148
+ {
149
+ "name": "stderr",
150
+ "output_type": "stream",
151
+ "text": [
152
+ "All model checkpoint layers were used when initializing TFGPT2LMHeadModel.\n",
153
+ "\n",
154
+ "All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
155
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.\n"
156
+ ]
157
+ },
158
+ {
159
+ "data": {
160
+ "application/vnd.jupyter.widget-view+json": {
161
+ "model_id": "4f442dd13c5747e1811c2199423de0c9",
162
+ "version_major": 2,
163
+ "version_minor": 0
164
+ },
165
+ "text/plain": [
166
+ "Downloading: 0%| | 0.00/0.99M [00:00<?, ?B/s]"
167
+ ]
168
+ },
169
+ "metadata": {},
170
+ "output_type": "display_data"
171
+ },
172
+ {
173
+ "data": {
174
+ "application/vnd.jupyter.widget-view+json": {
175
+ "model_id": "a88572455b744b18b99c5bd775944d77",
176
+ "version_major": 2,
177
+ "version_minor": 0
178
+ },
179
+ "text/plain": [
180
+ "Downloading: 0%| | 0.00/446k [00:00<?, ?B/s]"
181
+ ]
182
+ },
183
+ "metadata": {},
184
+ "output_type": "display_data"
185
+ },
186
+ {
187
+ "data": {
188
+ "application/vnd.jupyter.widget-view+json": {
189
+ "model_id": "d31a3e7e53a7422eabfcb61ff5248b8b",
190
+ "version_major": 2,
191
+ "version_minor": 0
192
+ },
193
+ "text/plain": [
194
+ "Downloading: 0%| | 0.00/1.29M [00:00<?, ?B/s]"
195
+ ]
196
+ },
197
+ "metadata": {},
198
+ "output_type": "display_data"
199
+ },
200
+ {
201
+ "name": "stderr",
202
+ "output_type": "stream",
203
+ "text": [
204
+ "Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence\n"
205
+ ]
206
+ },
207
+ {
208
+ "data": {
209
+ "text/plain": [
210
+ "[{'generated_text': 'Hello, I\\'m a language model for the world of design,\" explained the senior designer. \"In JavaScript, each line represents a block of code that'},\n",
211
+ " {'generated_text': \"Hello, I'm a language modeler extraordinaire. So if you're looking for an elegant and flexible way to express your language or for an\"},\n",
212
+ " {'generated_text': \"Hello, I'm a language modeler for Ruby using R, and as a newbie to Rails, I've been very interested in these two techniques\"}]"
213
+ ]
214
+ },
215
+ "execution_count": 3,
216
+ "metadata": {},
217
+ "output_type": "execute_result"
218
+ }
219
+ ],
220
+ "source": [
221
+ "from transformers import pipeline\n",
222
+ "generator = pipeline('text-generation', model = 'gpt2')\n",
223
+ "generator(\"Hello, I'm a language model\", max_length = 30, num_return_sequences=3)"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 3,
229
+ "metadata": {},
230
+ "outputs": [
231
+ {
232
+ "data": {
233
+ "application/vnd.jupyter.widget-view+json": {
234
+ "model_id": "af1c7c8ed0a84d74823d961f6bdb1e0d",
235
+ "version_major": 2,
236
+ "version_minor": 0
237
+ },
238
+ "text/plain": [
239
+ "Downloading: 0%| | 0.00/523M [00:00<?, ?B/s]"
240
+ ]
241
+ },
242
+ "metadata": {},
243
+ "output_type": "display_data"
244
+ },
245
+ {
246
+ "name": "stderr",
247
+ "output_type": "stream",
248
+ "text": [
249
+ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
250
+ ]
251
+ },
252
+ {
253
+ "data": {
254
+ "text/plain": [
255
+ "[{'generated_text': 'something to start with, they say, but even if it was just to add an element of humor to the recipe, she said it could become a'},\n",
256
+ " {'generated_text': \"something to start with. You don't have to have a real connection with the people in this building to have any sort of connection with them. And\"},\n",
257
+ " {'generated_text': \"something to start with, as I've seen several years to come. You're supposed to be a good, loving parent, and your kids are supposed\"}]"
258
+ ]
259
+ },
260
+ "execution_count": 3,
261
+ "metadata": {},
262
+ "output_type": "execute_result"
263
+ }
264
+ ],
265
+ "source": [
266
+ "from transformers import pipeline\n",
267
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
268
+ "\n",
269
+ "# tokenizer = AutoTokenizer.from_pretrained(\"BritishLibraryLabs/bl-books-genre\")\n",
270
+ "# model = AutoModelForCausalLM.from_pretrained(\"BritishLibraryLabs/bl-books-genre\")\n",
271
+ "# \"BritishLibraryLabs/bl-books-genre\"\n",
272
+ "\n",
273
+ "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
274
+ "model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n",
275
+ "\n",
276
+ "# generator = pipeline('text-generation', model = \"BritishLibraryLabs/bl-books-genre\")\n",
277
+ "# generator(\"Hello, I'm a language model\", max_length = 30, num_return_sequences=3)\n",
278
+ "\n",
279
+ "generator = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer)\n",
280
+ "generator('something to start with', max_length = 30, num_return_sequences=3)\n"
281
+ ]
282
+ },
283
  {
284
  "cell_type": "code",
285
  "execution_count": null,