Zack3D commited on
Commit
68971bf
·
verified ·
1 Parent(s): 9047431

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -51
app.py CHANGED
@@ -6,9 +6,9 @@ Adds an **in-browser paint tool** for the edit / inpaint workflow so users can
6
  draw the mask directly instead of uploading one.
7
 
8
  ### How mask painting works
9
- * Upload an image.
10
  * Use the *Mask* canvas to **paint the areas you’d like changed** (white =
11
- editable, black = keep).
12
  The new `gr.ImageMask` component captures your brush strokes.
13
  * The painted mask is converted to a 1‑channel PNG and sent to the
14
  `images.edit()` endpoint.
@@ -35,6 +35,7 @@ FORMAT_CHOICES = ["png", "jpeg", "webp"]
35
 
36
 
37
  def _client(key: str) -> openai.OpenAI:
 
38
  api_key = key.strip() or os.getenv("OPENAI_API_KEY", "")
39
  if not api_key:
40
  raise gr.Error("Please enter your OpenAI API key (never stored)")
@@ -45,7 +46,7 @@ def _img_list(resp, *, fmt: str) -> List[str]:
45
  """Return list of data URLs or direct URLs depending on API response."""
46
  mime = f"image/{fmt}"
47
  return [
48
- f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") else d.url
49
  for d in resp.data
50
  ]
51
 
@@ -58,27 +59,35 @@ def _common_kwargs(
58
  out_fmt: str,
59
  compression: int,
60
  transparent_bg: bool,
61
- ):
62
  """Prepare keyword arguments for Images API based on latest OpenAI spec."""
63
  kwargs: Dict[str, Any] = dict(
64
  model=MODEL,
65
  n=n,
66
- size=size,
67
- quality=quality,
68
- output_format=out_fmt,
69
  )
70
 
 
 
 
 
 
 
71
  # Prompt is optional for variations
72
  if prompt is not None:
73
  kwargs["prompt"] = prompt
74
 
 
 
 
 
75
  # Transparency via background parameter (png & webp only)
76
- if transparent_bg:
77
  kwargs["background"] = "transparent"
78
 
79
- # Compression for lossy formats
80
  if out_fmt in {"jpeg", "webp"}:
81
- kwargs["output_compression"] = f"{compression}%"
82
 
83
  return kwargs
84
 
@@ -95,11 +104,23 @@ def generate(
95
  compression: int,
96
  transparent_bg: bool,
97
  ):
 
 
 
98
  client = _client(api_key)
99
  try:
100
- resp = client.images.generate(**_common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg))
 
 
 
 
 
 
 
 
 
101
  except Exception as e:
102
- raise gr.Error(f"OpenAI error: {e}")
103
  return _img_list(resp, fmt=out_fmt)
104
 
105
 
@@ -120,20 +141,34 @@ def _extract_mask_array(mask_value: Union[np.ndarray, Dict[str, Any], None]) ->
120
 
121
  # If we already have a numpy array (ImageMask with type="numpy")
122
  if isinstance(mask_value, np.ndarray):
123
- return mask_value
124
-
125
- # If it's an EditorValue dict coming from ImageEditor/ImageMask with type="numpy"
126
- if isinstance(mask_value, dict):
127
  # Prefer the composite (all layers merged) if present
128
  comp = mask_value.get("composite")
129
- if comp is not None:
130
- return np.asarray(comp)
 
 
 
131
  # Fallback to the topmost layer
132
- layers = mask_value.get("layers")
133
- if layers:
134
- return np.asarray(layers[-1])
135
- # Unknown format ignore
136
- return None
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
 
139
  def edit_image(
@@ -148,31 +183,81 @@ def edit_image(
148
  compression: int,
149
  transparent_bg: bool,
150
  ):
 
151
  if image_numpy is None:
152
  raise gr.Error("Please upload an image.")
 
 
 
153
  img_bytes = _bytes_from_numpy(image_numpy)
154
 
155
  mask_bytes: Optional[bytes] = None
156
  mask_numpy = _extract_mask_array(mask_value)
157
 
158
  if mask_numpy is not None:
159
- # Convert painted area (any non-zero pixel) to white, else black; 1‑channel.
160
- if mask_numpy.shape[-1] == 4: # RGBA (has alpha channel)
161
- alpha = mask_numpy[:, :, 3]
162
- else: # RGB or grayscale
163
- alpha = np.any(mask_numpy != 0, axis=-1).astype(np.uint8) * 255
164
- bw = np.stack([alpha] * 3, axis=-1) # 3‑channel white/black
165
- mask_bytes = _bytes_from_numpy(bw)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  client = _client(api_key)
168
  try:
 
 
 
 
 
169
  resp = client.images.edit(
170
  image=img_bytes,
171
- mask=mask_bytes,
172
- **_common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg),
173
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  except Exception as e:
175
- raise gr.Error(f"OpenAI error: {e}")
176
  return _img_list(resp, fmt=out_fmt)
177
 
178
 
@@ -188,17 +273,28 @@ def variation_image(
188
  compression: int,
189
  transparent_bg: bool,
190
  ):
 
191
  if image_numpy is None:
192
  raise gr.Error("Please upload an image.")
193
  img_bytes = _bytes_from_numpy(image_numpy)
194
  client = _client(api_key)
195
  try:
 
 
196
  resp = client.images.variations(
197
  image=img_bytes,
198
- **_common_kwargs(None, n, size, quality, out_fmt, compression, transparent_bg),
199
  )
 
 
 
 
 
 
 
 
200
  except Exception as e:
201
- raise gr.Error(f"OpenAI error: {e}")
202
  return _img_list(resp, fmt=out_fmt)
203
 
204
 
@@ -206,18 +302,24 @@ def variation_image(
206
 
207
  def build_ui():
208
  with gr.Blocks(title="GPT-Image-1 (BYOT)") as demo:
209
- gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask) • Variations""")
 
 
 
 
210
 
211
  with gr.Accordion("🔐 API key", open=False):
212
  api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-…")
213
 
214
  # Common controls
215
- n_slider = gr.Slider(1, 10, value=1, step=1, label="Number of images (n)")
216
- size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size")
217
- quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality")
218
- out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format")
219
- compression = gr.Slider(0, 100, value=75, step=1, label="Compression (JPEG/WebP)")
220
- transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)")
 
 
221
 
222
  def _toggle_compression(fmt):
223
  return gr.update(visible=fmt in {"jpeg", "webp"})
@@ -227,28 +329,57 @@ def build_ui():
227
  with gr.Tabs():
228
  # ----- Generate Tab ----- #
229
  with gr.TabItem("Generate"):
230
- prompt_gen = gr.Textbox(label="Prompt", lines=2, placeholder="A photorealistic ginger cat astronaut on Mars")
231
- btn_gen = gr.Button("Generate 🚀")
232
- gallery_gen = gr.Gallery(columns=2, height="auto")
 
233
  btn_gen.click(
234
  generate,
235
  inputs=[api, prompt_gen, n_slider, size, quality, out_fmt, compression, transparent],
236
  outputs=gallery_gen,
 
237
  )
238
 
239
  # ----- Edit Tab ----- #
240
  with gr.TabItem("Edit / Inpaint"):
241
- img_edit = gr.Image(label="Image", type="numpy")
242
- mask_canvas = gr.ImageMask(label="Mask – paint white where the image should change", type="numpy")
243
- prompt_edit = gr.Textbox(label="Edit prompt", lines=2, placeholder="Replace the sky with a starry night")
244
- btn_edit = gr.Button("Edit 🖌️")
245
- gallery_edit = gr.Gallery(columns=2, height="auto")
 
 
 
 
 
 
 
 
 
 
246
  btn_edit.click(
247
  edit_image,
248
  inputs=[api, img_edit, mask_canvas, prompt_edit, n_slider, size, quality, out_fmt, compression, transparent],
249
  outputs=gallery_edit,
 
250
  )
251
 
252
  # ----- Variations Tab ----- #
253
  with gr.TabItem("Variations"):
254
- img_var =
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  draw the mask directly instead of uploading one.
7
 
8
  ### How mask painting works
9
+ * Upload an image.
10
  * Use the *Mask* canvas to **paint the areas you’d like changed** (white =
11
+ editable, black = keep).
12
  The new `gr.ImageMask` component captures your brush strokes.
13
  * The painted mask is converted to a 1‑channel PNG and sent to the
14
  `images.edit()` endpoint.
 
35
 
36
 
37
  def _client(key: str) -> openai.OpenAI:
38
+ """Initializes the OpenAI client with the provided API key."""
39
  api_key = key.strip() or os.getenv("OPENAI_API_KEY", "")
40
  if not api_key:
41
  raise gr.Error("Please enter your OpenAI API key (never stored)")
 
46
  """Return list of data URLs or direct URLs depending on API response."""
47
  mime = f"image/{fmt}"
48
  return [
49
+ f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
50
  for d in resp.data
51
  ]
52
 
 
59
  out_fmt: str,
60
  compression: int,
61
  transparent_bg: bool,
62
+ ) -> Dict[str, Any]:
63
  """Prepare keyword arguments for Images API based on latest OpenAI spec."""
64
  kwargs: Dict[str, Any] = dict(
65
  model=MODEL,
66
  n=n,
67
+ response_format="b64_json", # Request base64 to avoid potential URL expiry issues
 
 
68
  )
69
 
70
+ # Use API defaults if 'auto' is selected
71
+ if size != "auto":
72
+ kwargs["size"] = size
73
+ if quality != "auto":
74
+ kwargs["quality"] = quality
75
+
76
  # Prompt is optional for variations
77
  if prompt is not None:
78
  kwargs["prompt"] = prompt
79
 
80
+ # Output format specific settings
81
+ if out_fmt != "png": # API default is png
82
+ kwargs["output_format"] = out_fmt
83
+
84
  # Transparency via background parameter (png & webp only)
85
+ if transparent_bg and out_fmt in {"png", "webp"}:
86
  kwargs["background"] = "transparent"
87
 
88
+ # Compression for lossy formats (API expects integer 0-100)
89
  if out_fmt in {"jpeg", "webp"}:
90
+ kwargs["output_compression"] = compression
91
 
92
  return kwargs
93
 
 
104
  compression: int,
105
  transparent_bg: bool,
106
  ):
107
+ """Calls the OpenAI image generation endpoint."""
108
+ if not prompt:
109
+ raise gr.Error("Please enter a prompt.")
110
  client = _client(api_key)
111
  try:
112
+ common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
113
+ resp = client.images.generate(**common_args)
114
+ except openai.AuthenticationError:
115
+ raise gr.Error("Invalid OpenAI API key.")
116
+ except openai.PermissionDeniedError:
117
+ raise gr.Error("Permission denied. Check your API key permissions.")
118
+ except openai.RateLimitError:
119
+ raise gr.Error("Rate limit exceeded. Please try again later.")
120
+ except openai.BadRequestError as e:
121
+ raise gr.Error(f"OpenAI Bad Request: {e}")
122
  except Exception as e:
123
+ raise gr.Error(f"An unexpected error occurred: {e}")
124
  return _img_list(resp, fmt=out_fmt)
125
 
126
 
 
141
 
142
  # If we already have a numpy array (ImageMask with type="numpy")
143
  if isinstance(mask_value, np.ndarray):
144
+ mask_arr = mask_value
145
+ # If it's an EditorValue dict coming from ImageEditor/ImageMask with type="file" or "pil"
146
+ elif isinstance(mask_value, dict):
 
147
  # Prefer the composite (all layers merged) if present
148
  comp = mask_value.get("composite")
149
+ if comp is not None and isinstance(comp, (Image.Image, np.ndarray)):
150
+ mask_arr = np.array(comp) if isinstance(comp, Image.Image) else comp
151
+ # Fallback to the mask if present (often from ImageMask)
152
+ elif mask_value.get("mask") is not None and isinstance(mask_value["mask"], (Image.Image, np.ndarray)):
153
+ mask_arr = np.array(mask_value["mask"]) if isinstance(mask_value["mask"], Image.Image) else mask_value["mask"]
154
  # Fallback to the topmost layer
155
+ elif mask_value.get("layers"):
156
+ top_layer = mask_value["layers"][-1]
157
+ if isinstance(top_layer, (Image.Image, np.ndarray)):
158
+ mask_arr = np.array(top_layer) if isinstance(top_layer, Image.Image) else top_layer
159
+ else:
160
+ return None # Cannot process layer format
161
+ else:
162
+ return None # No usable image data found in dict
163
+ else:
164
+ # Unknown format – ignore
165
+ return None
166
+
167
+ # Ensure mask_arr is a numpy array now
168
+ if not isinstance(mask_arr, np.ndarray):
169
+ return None # Should not happen after above checks, but safeguard
170
+
171
+ return mask_arr
172
 
173
 
174
  def edit_image(
 
183
  compression: int,
184
  transparent_bg: bool,
185
  ):
186
+ """Calls the OpenAI image edit endpoint."""
187
  if image_numpy is None:
188
  raise gr.Error("Please upload an image.")
189
+ if not prompt:
190
+ raise gr.Error("Please enter an edit prompt.")
191
+
192
  img_bytes = _bytes_from_numpy(image_numpy)
193
 
194
  mask_bytes: Optional[bytes] = None
195
  mask_numpy = _extract_mask_array(mask_value)
196
 
197
  if mask_numpy is not None:
198
+ # Check if the mask seems empty (all black or fully transparent)
199
+ if np.all(mask_numpy == 0) or (mask_numpy.shape[-1] == 4 and np.all(mask_numpy[:, :, 3] == 0)):
200
+ gr.Warning("The provided mask appears empty. The entire image might be edited if no mask is applied by the API.")
201
+ # We explicitly pass None if the mask is effectively empty,
202
+ # letting the API decide how to handle it (might vary by model/version)
203
+ mask_bytes = None
204
+ else:
205
+ # Convert painted area (any non-black pixel or non-transparent pixel) to white, else black; 1‑channel alpha.
206
+ # The API expects the mask as a single alpha channel where transparency indicates the area to edit.
207
+ # White in our canvas means "edit", so this needs to become transparent in the mask sent to the API.
208
+ # Black in our canvas means "keep", so this needs to become opaque in the mask sent to the API.
209
+
210
+ if mask_numpy.ndim == 2: # Grayscale
211
+ alpha = (mask_numpy == 0).astype(np.uint8) * 255 # Black becomes opaque (255), white becomes transparent (0)
212
+ elif mask_numpy.shape[-1] == 4: # RGBA (use alpha channel)
213
+ alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255 # Transparent becomes opaque, opaque becomes transparent
214
+ elif mask_numpy.shape[-1] == 3: # RGB
215
+ # Consider any non-black pixel as the area to edit (becomes transparent)
216
+ alpha = np.all(mask_numpy == [0, 0, 0], axis=-1).astype(np.uint8) * 255
217
+ else:
218
+ raise gr.Error("Unsupported mask format.")
219
+
220
+ # Create a single-channel L mode image (grayscale) for the mask
221
+ mask_img = Image.fromarray(alpha, mode='L')
222
+ out = io.BytesIO()
223
+ mask_img.save(out, format="PNG")
224
+ mask_bytes = out.getvalue()
225
+
226
+ # Debug: Save mask locally to check
227
+ # mask_img.save("debug_mask_sent_to_api.png")
228
+
229
+ else:
230
+ gr.Warning("No mask provided or mask could not be processed. The API might edit the entire image or apply a default mask.")
231
+ mask_bytes = None # Explicitly pass None if no mask is usable
232
 
233
  client = _client(api_key)
234
  try:
235
+ common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
236
+ # The edit endpoint requires the prompt
237
+ if "prompt" not in common_args:
238
+ common_args["prompt"] = prompt # Should always be there via _common_kwargs, but safeguard
239
+
240
  resp = client.images.edit(
241
  image=img_bytes,
242
+ mask=mask_bytes, # Pass None if no mask or empty mask
243
+ **common_args,
244
  )
245
+ except openai.AuthenticationError:
246
+ raise gr.Error("Invalid OpenAI API key.")
247
+ except openai.PermissionDeniedError:
248
+ raise gr.Error("Permission denied. Check your API key permissions.")
249
+ except openai.RateLimitError:
250
+ raise gr.Error("Rate limit exceeded. Please try again later.")
251
+ except openai.BadRequestError as e:
252
+ # Provide more specific feedback if possible
253
+ if "mask" in str(e) and "alpha channel" in str(e):
254
+ raise gr.Error("OpenAI API Error: The mask must be a PNG image with transparency indicating the edit area. Ensure your mask was processed correctly.")
255
+ elif "size" in str(e):
256
+ raise gr.Error(f"OpenAI API Error: Image and mask size mismatch or invalid size. Ensure image is square if required by the model. Error: {e}")
257
+ else:
258
+ raise gr.Error(f"OpenAI Bad Request: {e}")
259
  except Exception as e:
260
+ raise gr.Error(f"An unexpected error occurred: {e}")
261
  return _img_list(resp, fmt=out_fmt)
262
 
263
 
 
273
  compression: int,
274
  transparent_bg: bool,
275
  ):
276
+ """Calls the OpenAI image variations endpoint."""
277
  if image_numpy is None:
278
  raise gr.Error("Please upload an image.")
279
  img_bytes = _bytes_from_numpy(image_numpy)
280
  client = _client(api_key)
281
  try:
282
+ # Prompt is None for variations
283
+ common_args = _common_kwargs(None, n, size, quality, out_fmt, compression, transparent_bg)
284
  resp = client.images.variations(
285
  image=img_bytes,
286
+ **common_args,
287
  )
288
+ except openai.AuthenticationError:
289
+ raise gr.Error("Invalid OpenAI API key.")
290
+ except openai.PermissionDeniedError:
291
+ raise gr.Error("Permission denied. Check your API key permissions.")
292
+ except openai.RateLimitError:
293
+ raise gr.Error("Rate limit exceeded. Please try again later.")
294
+ except openai.BadRequestError as e:
295
+ raise gr.Error(f"OpenAI Bad Request: {e}")
296
  except Exception as e:
297
+ raise gr.Error(f"An unexpected error occurred: {e}")
298
  return _img_list(resp, fmt=out_fmt)
299
 
300
 
 
302
 
303
  def build_ui():
304
  with gr.Blocks(title="GPT-Image-1 (BYOT)") as demo:
305
+ gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask!) • Variations""")
306
+ gr.Markdown(
307
+ "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
308
+ " This space uses the `gpt-image-1` model."
309
+ )
310
 
311
  with gr.Accordion("🔐 API key", open=False):
312
  api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-…")
313
 
314
  # Common controls
315
+ with gr.Row():
316
+ n_slider = gr.Slider(1, 4, value=1, step=1, label="Number of images (n)", info="Max 4 for this demo.") # Limit n for stability/cost
317
+ size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
318
+ quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
319
+ with gr.Row():
320
+ out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format")
321
+ compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False)
322
+ transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)")
323
 
324
  def _toggle_compression(fmt):
325
  return gr.update(visible=fmt in {"jpeg", "webp"})
 
329
  with gr.Tabs():
330
  # ----- Generate Tab ----- #
331
  with gr.TabItem("Generate"):
332
+ with gr.Row():
333
+ prompt_gen = gr.Textbox(label="Prompt", lines=3, placeholder="A photorealistic ginger cat astronaut on Mars", scale=4)
334
+ btn_gen = gr.Button("Generate 🚀", variant="primary", scale=1)
335
+ gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
336
  btn_gen.click(
337
  generate,
338
  inputs=[api, prompt_gen, n_slider, size, quality, out_fmt, compression, transparent],
339
  outputs=gallery_gen,
340
+ api_name="generate"
341
  )
342
 
343
  # ----- Edit Tab ----- #
344
  with gr.TabItem("Edit / Inpaint"):
345
+ gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit).")
346
+ with gr.Row():
347
+ img_edit = gr.Image(label="Source Image", type="numpy", height=400)
348
+ # Use ImageMask component for interactive painting
349
+ mask_canvas = gr.ImageMask(
350
+ label="Mask – Paint White Where Image Should Change",
351
+ type="numpy", # Get mask as numpy array
352
+ # brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"), # Force white brush
353
+ # mask_opacity=0.7 # Adjust mask visibility on image
354
+ height=400
355
+ )
356
+ with gr.Row():
357
+ prompt_edit = gr.Textbox(label="Edit prompt", lines=2, placeholder="Replace the sky with a starry night", scale=4)
358
+ btn_edit = gr.Button("Edit 🖌️", variant="primary", scale=1)
359
+ gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
360
  btn_edit.click(
361
  edit_image,
362
  inputs=[api, img_edit, mask_canvas, prompt_edit, n_slider, size, quality, out_fmt, compression, transparent],
363
  outputs=gallery_edit,
364
+ api_name="edit"
365
  )
366
 
367
  # ----- Variations Tab ----- #
368
  with gr.TabItem("Variations"):
369
+ gr.Markdown("Upload an image to generate variations.")
370
+ with gr.Row():
371
+ img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
372
+ btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
373
+ gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
374
+ btn_var.click(
375
+ variation_image,
376
+ inputs=[api, img_var, n_slider, size, quality, out_fmt, compression, transparent],
377
+ outputs=gallery_var,
378
+ api_name="variations"
379
+ )
380
+
381
+ return demo
382
+
383
+ if __name__ == "__main__":
384
+ app = build_ui()
385
+ app.launch()