Luis Oala commited on
Commit
06e8e4d
·
1 Parent(s): e237845
Files changed (3) hide show
  1. README.md~ +0 -37
  2. app.py~ +0 -201
  3. server.py~ +0 -246
README.md~ DELETED
@@ -1,37 +0,0 @@
1
- ---
2
- title: Glide Text2im
3
- emoji: 📊
4
- colorFrom: purple
5
- colorTo: gray
6
- sdk: gradio
7
- app_file: app.py
8
- pinned: false
9
- ---
10
-
11
- # Configuration
12
-
13
- `title`: _string_
14
- Display title for the Space
15
-
16
- `emoji`: _string_
17
- Space emoji (emoji-only character allowed)
18
-
19
- `colorFrom`: _string_
20
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21
-
22
- `colorTo`: _string_
23
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24
-
25
- `sdk`: _string_
26
- Can be either `gradio` or `streamlit`
27
-
28
- `sdk_version` : _string_
29
- Only applicable for `streamlit` SDK.
30
- See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
31
-
32
- `app_file`: _string_
33
- Path to your main application file (which contains either `gradio` or `streamlit` Python code).
34
- Path is relative to the root of the repository.
35
-
36
- `pinned`: _boolean_
37
- Whether the Space stays on top of your list.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py~ DELETED
@@ -1,201 +0,0 @@
1
-
2
- import os
3
- os.system('pip install -e .')
4
- import gradio as gr
5
-
6
- import base64
7
- from io import BytesIO
8
- # from fastapi import FastAPI
9
-
10
- from PIL import Image
11
- import torch as th
12
-
13
- from glide_text2im.download import load_checkpoint
14
- from glide_text2im.model_creation import (
15
- create_model_and_diffusion,
16
- model_and_diffusion_defaults,
17
- model_and_diffusion_defaults_upsampler
18
- )
19
-
20
- """
21
- credit: follows the gradio glide example by valhalla https://huggingface.co/spaces/valhalla/glide-text2im
22
- """
23
-
24
-
25
- # print("Loading models...")
26
- # app = FastAPI()
27
-
28
- # This notebook supports both CPU and GPU.
29
- # On CPU, generating one sample may take on the order of 20 minutes.
30
- # On a GPU, it should be under a minute.
31
-
32
- has_cuda = th.cuda.is_available()
33
- device = th.device('cpu' if not has_cuda else 'cuda')
34
-
35
- # Create base model.
36
- options = model_and_diffusion_defaults()
37
- options['use_fp16'] = has_cuda
38
- options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
39
- model, diffusion = create_model_and_diffusion(**options)
40
- model.eval()
41
- if has_cuda:
42
- model.convert_to_fp16()
43
- model.to(device)
44
- model.load_state_dict(load_checkpoint('base', device))
45
- print('total base parameters', sum(x.numel() for x in model.parameters()))
46
-
47
- # Create upsampler model.
48
- options_up = model_and_diffusion_defaults_upsampler()
49
- options_up['use_fp16'] = has_cuda
50
- options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
51
- model_up, diffusion_up = create_model_and_diffusion(**options_up)
52
- model_up.eval()
53
- if has_cuda:
54
- model_up.convert_to_fp16()
55
- model_up.to(device)
56
- model_up.load_state_dict(load_checkpoint('upsample', device))
57
- print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
58
-
59
-
60
- def get_images(batch: th.Tensor):
61
- """ Display a batch of images inline. """
62
- scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
63
- reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
64
- return Image.fromarray(reshaped.numpy())
65
-
66
-
67
- # Create a classifier-free guidance sampling function
68
- guidance_scale = 3.0
69
-
70
- def model_fn(x_t, ts, **kwargs):
71
- half = x_t[: len(x_t) // 2]
72
- combined = th.cat([half, half], dim=0)
73
- model_out = model(combined, ts, **kwargs)
74
- eps, rest = model_out[:, :3], model_out[:, 3:]
75
- cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)
76
- half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
77
- eps = th.cat([half_eps, half_eps], dim=0)
78
- return th.cat([eps, rest], dim=1)
79
-
80
-
81
- # @app.get("/")
82
- def read_root():
83
- return {"glide!"}
84
-
85
- # @app.get("/{generate}")
86
- def sample(prompt):
87
- # Sampling parameters
88
- batch_size = 1
89
-
90
- # Tune this parameter to control the sharpness of 256x256 images.
91
- # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
92
- upsample_temp = 0.997
93
-
94
- ##############################
95
- # Sample from the base model #
96
- ##############################
97
-
98
- # Create the text tokens to feed to the model.
99
- tokens = model.tokenizer.encode(prompt)
100
- tokens, mask = model.tokenizer.padded_tokens_and_mask(
101
- tokens, options['text_ctx']
102
- )
103
-
104
- # Create the classifier-free guidance tokens (empty)
105
- full_batch_size = batch_size * 2
106
- uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask(
107
- [], options['text_ctx']
108
- )
109
-
110
- # Pack the tokens together into model kwargs.
111
- model_kwargs = dict(
112
- tokens=th.tensor(
113
- [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
114
- ),
115
- mask=th.tensor(
116
- [mask] * batch_size + [uncond_mask] * batch_size,
117
- dtype=th.bool,
118
- device=device,
119
- ),
120
- )
121
-
122
- # Sample from the base model.
123
- model.del_cache()
124
- samples = diffusion.p_sample_loop(
125
- model_fn,
126
- (full_batch_size, 3, options["image_size"], options["image_size"]),
127
- device=device,
128
- clip_denoised=True,
129
- progress=True,
130
- model_kwargs=model_kwargs,
131
- cond_fn=None,
132
- )[:batch_size]
133
- model.del_cache()
134
-
135
-
136
- ##############################
137
- # Upsample the 64x64 samples #
138
- ##############################
139
-
140
- tokens = model_up.tokenizer.encode(prompt)
141
- tokens, mask = model_up.tokenizer.padded_tokens_and_mask(
142
- tokens, options_up['text_ctx']
143
- )
144
-
145
- # Create the model conditioning dict.
146
- model_kwargs = dict(
147
- # Low-res image to upsample.
148
- low_res=((samples+1)*127.5).round()/127.5 - 1,
149
-
150
- # Text tokens
151
- tokens=th.tensor(
152
- [tokens] * batch_size, device=device
153
- ),
154
- mask=th.tensor(
155
- [mask] * batch_size,
156
- dtype=th.bool,
157
- device=device,
158
- ),
159
- )
160
-
161
- # Sample from the base model.
162
- model_up.del_cache()
163
- up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
164
- up_samples = diffusion_up.ddim_sample_loop(
165
- model_up,
166
- up_shape,
167
- noise=th.randn(up_shape, device=device) * upsample_temp,
168
- device=device,
169
- clip_denoised=True,
170
- progress=True,
171
- model_kwargs=model_kwargs,
172
- cond_fn=None,
173
- )[:batch_size]
174
- model_up.del_cache()
175
-
176
- # Show the output
177
- image = get_images(up_samples)
178
- # image = to_base64(image)
179
- # return {"image": image}
180
- return image
181
-
182
-
183
- def to_base64(pil_image):
184
- buffered = BytesIO()
185
- pil_image.save(buffered, format="JPEG")
186
- return base64.b64encode(buffered.getvalue())
187
-
188
- title = "Interactive demo: glide-text2im"
189
- description = "Demo for OpenAI's GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models."
190
- article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10741'>GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models</a> | <a href='https://github.com/openai/glide-text2im/'>Official Repo</a></p>"
191
- examples =["an oil painting of a corgi"]
192
-
193
- iface = gr.Interface(fn=sample,
194
- inputs=gr.inputs.Textbox(label='What would you like to see?'),
195
- outputs=gr.outputs.Image(type="pil", label="Model input + completions"),
196
- title=title,
197
- description=description,
198
- article=article,
199
- examples=examples,
200
- enable_queue=True)
201
- iface.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
server.py~ DELETED
@@ -1,246 +0,0 @@
1
- import base64
2
- from io import BytesIO
3
- from fastapi import FastAPI
4
- <<<<<<< HEAD
5
-
6
- from PIL import Image
7
- import torch as th
8
-
9
- =======
10
- from PIL import Image
11
- import torch as th
12
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
13
- from glide_text2im.download import load_checkpoint
14
- from glide_text2im.model_creation import (
15
- create_model_and_diffusion,
16
- model_and_diffusion_defaults,
17
- model_and_diffusion_defaults_upsampler
18
- )
19
- <<<<<<< HEAD
20
-
21
- print("Loading models...")
22
- app = FastAPI()
23
-
24
- # This notebook supports both CPU and GPU.
25
- # On CPU, generating one sample may take on the order of 20 minutes.
26
- # On a GPU, it should be under a minute.
27
-
28
- has_cuda = th.cuda.is_available()
29
- device = th.device('cpu' if not has_cuda else 'cuda')
30
-
31
- =======
32
- print("Loading models...")
33
- app = FastAPI()
34
- # This notebook supports both CPU and GPU.
35
- # On CPU, generating one sample may take on the order of 20 minutes.
36
- # On a GPU, it should be under a minute.
37
- has_cuda = th.cuda.is_available()
38
- device = th.device('cpu' if not has_cuda else 'cuda')
39
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
40
- # Create base model.
41
- options = model_and_diffusion_defaults()
42
- options['use_fp16'] = has_cuda
43
- options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
44
- model, diffusion = create_model_and_diffusion(**options)
45
- model.eval()
46
- if has_cuda:
47
- model.convert_to_fp16()
48
- model.to(device)
49
- model.load_state_dict(load_checkpoint('base', device))
50
- print('total base parameters', sum(x.numel() for x in model.parameters()))
51
- <<<<<<< HEAD
52
-
53
- =======
54
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
55
- # Create upsampler model.
56
- options_up = model_and_diffusion_defaults_upsampler()
57
- options_up['use_fp16'] = has_cuda
58
- options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
59
- model_up, diffusion_up = create_model_and_diffusion(**options_up)
60
- model_up.eval()
61
- if has_cuda:
62
- model_up.convert_to_fp16()
63
- model_up.to(device)
64
- model_up.load_state_dict(load_checkpoint('upsample', device))
65
- print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
66
- <<<<<<< HEAD
67
-
68
-
69
- =======
70
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
71
- def get_images(batch: th.Tensor):
72
- """ Display a batch of images inline. """
73
- scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
74
- reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
75
- Image.fromarray(reshaped.numpy())
76
- <<<<<<< HEAD
77
-
78
-
79
- # Create a classifier-free guidance sampling function
80
- guidance_scale = 3.0
81
-
82
- =======
83
- # Create a classifier-free guidance sampling function
84
- guidance_scale = 3.0
85
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
86
- def model_fn(x_t, ts, **kwargs):
87
- half = x_t[: len(x_t) // 2]
88
- combined = th.cat([half, half], dim=0)
89
- model_out = model(combined, ts, **kwargs)
90
- eps, rest = model_out[:, :3], model_out[:, 3:]
91
- cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)
92
- half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
93
- eps = th.cat([half_eps, half_eps], dim=0)
94
- return th.cat([eps, rest], dim=1)
95
- <<<<<<< HEAD
96
-
97
-
98
- @app.get("/")
99
- def read_root():
100
- return {"glide!"}
101
-
102
- =======
103
- @app.get("/")
104
- def read_root():
105
- return {"glide!"}
106
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
107
- @app.get("/{generate}")
108
- def sample(prompt):
109
- # Sampling parameters
110
- batch_size = 1
111
- <<<<<<< HEAD
112
-
113
- # Tune this parameter to control the sharpness of 256x256 images.
114
- # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
115
- upsample_temp = 0.997
116
-
117
- ##############################
118
- # Sample from the base model #
119
- ##############################
120
-
121
- =======
122
- # Tune this parameter to control the sharpness of 256x256 images.
123
- # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
124
- upsample_temp = 0.997
125
- ##############################
126
- # Sample from the base model #
127
- ##############################
128
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
129
- # Create the text tokens to feed to the model.
130
- tokens = model.tokenizer.encode(prompt)
131
- tokens, mask = model.tokenizer.padded_tokens_and_mask(
132
- tokens, options['text_ctx']
133
- )
134
- <<<<<<< HEAD
135
-
136
- =======
137
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
138
- # Create the classifier-free guidance tokens (empty)
139
- full_batch_size = batch_size * 2
140
- uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask(
141
- [], options['text_ctx']
142
- )
143
- <<<<<<< HEAD
144
-
145
- =======
146
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
147
- # Pack the tokens together into model kwargs.
148
- model_kwargs = dict(
149
- tokens=th.tensor(
150
- [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
151
- ),
152
- mask=th.tensor(
153
- [mask] * batch_size + [uncond_mask] * batch_size,
154
- dtype=th.bool,
155
- device=device,
156
- ),
157
- )
158
- <<<<<<< HEAD
159
-
160
- =======
161
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
162
- # Sample from the base model.
163
- model.del_cache()
164
- samples = diffusion.p_sample_loop(
165
- model_fn,
166
- (full_batch_size, 3, options["image_size"], options["image_size"]),
167
- device=device,
168
- clip_denoised=True,
169
- progress=True,
170
- model_kwargs=model_kwargs,
171
- cond_fn=None,
172
- )[:batch_size]
173
- model.del_cache()
174
- <<<<<<< HEAD
175
-
176
-
177
- ##############################
178
- # Upsample the 64x64 samples #
179
- ##############################
180
-
181
- =======
182
- ##############################
183
- # Upsample the 64x64 samples #
184
- ##############################
185
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
186
- tokens = model_up.tokenizer.encode(prompt)
187
- tokens, mask = model_up.tokenizer.padded_tokens_and_mask(
188
- tokens, options_up['text_ctx']
189
- )
190
- <<<<<<< HEAD
191
-
192
- =======
193
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
194
- # Create the model conditioning dict.
195
- model_kwargs = dict(
196
- # Low-res image to upsample.
197
- low_res=((samples+1)*127.5).round()/127.5 - 1,
198
- <<<<<<< HEAD
199
-
200
- =======
201
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
202
- # Text tokens
203
- tokens=th.tensor(
204
- [tokens] * batch_size, device=device
205
- ),
206
- mask=th.tensor(
207
- [mask] * batch_size,
208
- dtype=th.bool,
209
- device=device,
210
- ),
211
- )
212
- <<<<<<< HEAD
213
-
214
- =======
215
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
216
- # Sample from the base model.
217
- model_up.del_cache()
218
- up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
219
- up_samples = diffusion_up.ddim_sample_loop(
220
- model_up,
221
- up_shape,
222
- noise=th.randn(up_shape, device=device) * upsample_temp,
223
- device=device,
224
- clip_denoised=True,
225
- progress=True,
226
- model_kwargs=model_kwargs,
227
- cond_fn=None,
228
- )[:batch_size]
229
- model_up.del_cache()
230
- <<<<<<< HEAD
231
-
232
- =======
233
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
234
- # Show the output
235
- image = get_images(up_samples)
236
- image = to_base64(image)
237
- return {"image": image}
238
- <<<<<<< HEAD
239
-
240
-
241
- =======
242
- >>>>>>> 8c239b8a9cdaf13e28c145e788b984c129547a37
243
- def to_base64(pil_image):
244
- buffered = BytesIO()
245
- pil_image.save(buffered, format="JPEG")
246
- return base64.b64encode(buffered.getvalue())