Zack3D commited on
Commit
bc30d26
·
verified ·
1 Parent(s): 68971bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -71
app.py CHANGED
@@ -1,22 +1,3 @@
1
- """
2
- Gradio Space: GPT-Image-1 – BYOT playground
3
- Generate · Edit (paint mask!) · Variations
4
- ==========================================
5
- Adds an **in-browser paint tool** for the edit / inpaint workflow so users can
6
- draw the mask directly instead of uploading one.
7
-
8
- ### How mask painting works
9
- * Upload an image.
10
- * Use the *Mask* canvas to **paint the areas you’d like changed** (white =
11
- editable, black = keep).
12
- The new `gr.ImageMask` component captures your brush strokes.
13
- * The painted mask is converted to a 1‑channel PNG and sent to the
14
- `images.edit()` endpoint.
15
-
16
- All other controls (size, quality, format, compression, n, background) stay the
17
- same.
18
- """
19
-
20
  from __future__ import annotations
21
 
22
  import io
@@ -45,6 +26,7 @@ def _client(key: str) -> openai.OpenAI:
45
  def _img_list(resp, *, fmt: str) -> List[str]:
46
  """Return list of data URLs or direct URLs depending on API response."""
47
  mime = f"image/{fmt}"
 
48
  return [
49
  f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
50
  for d in resp.data
@@ -64,7 +46,7 @@ def _common_kwargs(
64
  kwargs: Dict[str, Any] = dict(
65
  model=MODEL,
66
  n=n,
67
- response_format="b64_json", # Request base64 to avoid potential URL expiry issues
68
  )
69
 
70
  # Use API defaults if 'auto' is selected
@@ -77,8 +59,8 @@ def _common_kwargs(
77
  if prompt is not None:
78
  kwargs["prompt"] = prompt
79
 
80
- # Output format specific settings
81
- if out_fmt != "png": # API default is png
82
  kwargs["output_format"] = out_fmt
83
 
84
  # Transparency via background parameter (png & webp only)
@@ -87,7 +69,8 @@ def _common_kwargs(
87
 
88
  # Compression for lossy formats (API expects integer 0-100)
89
  if out_fmt in {"jpeg", "webp"}:
90
- kwargs["output_compression"] = compression
 
91
 
92
  return kwargs
93
 
@@ -114,11 +97,23 @@ def generate(
114
  except openai.AuthenticationError:
115
  raise gr.Error("Invalid OpenAI API key.")
116
  except openai.PermissionDeniedError:
117
- raise gr.Error("Permission denied. Check your API key permissions.")
118
  except openai.RateLimitError:
119
  raise gr.Error("Rate limit exceeded. Please try again later.")
120
  except openai.BadRequestError as e:
121
- raise gr.Error(f"OpenAI Bad Request: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
122
  except Exception as e:
123
  raise gr.Error(f"An unexpected error occurred: {e}")
124
  return _img_list(resp, fmt=out_fmt)
@@ -196,38 +191,62 @@ def edit_image(
196
 
197
  if mask_numpy is not None:
198
  # Check if the mask seems empty (all black or fully transparent)
199
- if np.all(mask_numpy == 0) or (mask_numpy.shape[-1] == 4 and np.all(mask_numpy[:, :, 3] == 0)):
200
- gr.Warning("The provided mask appears empty. The entire image might be edited if no mask is applied by the API.")
201
- # We explicitly pass None if the mask is effectively empty,
202
- # letting the API decide how to handle it (might vary by model/version)
 
 
 
 
 
 
 
203
  mask_bytes = None
204
  else:
205
- # Convert painted area (any non-black pixel or non-transparent pixel) to white, else black; 1‑channel alpha.
206
- # The API expects the mask as a single alpha channel where transparency indicates the area to edit.
207
- # White in our canvas means "edit", so this needs to become transparent in the mask sent to the API.
208
- # Black in our canvas means "keep", so this needs to become opaque in the mask sent to the API.
209
-
210
- if mask_numpy.ndim == 2: # Grayscale
211
- alpha = (mask_numpy == 0).astype(np.uint8) * 255 # Black becomes opaque (255), white becomes transparent (0)
212
- elif mask_numpy.shape[-1] == 4: # RGBA (use alpha channel)
213
- alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255 # Transparent becomes opaque, opaque becomes transparent
214
- elif mask_numpy.shape[-1] == 3: # RGB
215
- # Consider any non-black pixel as the area to edit (becomes transparent)
216
- alpha = np.all(mask_numpy == [0, 0, 0], axis=-1).astype(np.uint8) * 255
 
 
 
 
 
 
 
 
 
 
217
  else:
218
  raise gr.Error("Unsupported mask format.")
219
 
220
- # Create a single-channel L mode image (grayscale) for the mask
221
  mask_img = Image.fromarray(alpha, mode='L')
 
 
 
 
 
 
 
222
  out = io.BytesIO()
223
- mask_img.save(out, format="PNG")
224
  mask_bytes = out.getvalue()
225
 
226
  # Debug: Save mask locally to check
227
- # mask_img.save("debug_mask_sent_to_api.png")
228
 
229
  else:
230
- gr.Warning("No mask provided or mask could not be processed. The API might edit the entire image or apply a default mask.")
231
  mask_bytes = None # Explicitly pass None if no mask is usable
232
 
233
  client = _client(api_key)
@@ -237,25 +256,39 @@ def edit_image(
237
  if "prompt" not in common_args:
238
  common_args["prompt"] = prompt # Should always be there via _common_kwargs, but safeguard
239
 
240
- resp = client.images.edit(
241
- image=img_bytes,
242
- mask=mask_bytes, # Pass None if no mask or empty mask
243
- **common_args,
244
- )
 
 
 
 
 
245
  except openai.AuthenticationError:
246
  raise gr.Error("Invalid OpenAI API key.")
247
  except openai.PermissionDeniedError:
248
- raise gr.Error("Permission denied. Check your API key permissions.")
249
  except openai.RateLimitError:
250
  raise gr.Error("Rate limit exceeded. Please try again later.")
251
  except openai.BadRequestError as e:
252
- # Provide more specific feedback if possible
253
- if "mask" in str(e) and "alpha channel" in str(e):
254
- raise gr.Error("OpenAI API Error: The mask must be a PNG image with transparency indicating the edit area. Ensure your mask was processed correctly.")
255
- elif "size" in str(e):
256
- raise gr.Error(f"OpenAI API Error: Image and mask size mismatch or invalid size. Ensure image is square if required by the model. Error: {e}")
257
- else:
258
- raise gr.Error(f"OpenAI Bad Request: {e}")
 
 
 
 
 
 
 
 
 
259
  except Exception as e:
260
  raise gr.Error(f"An unexpected error occurred: {e}")
261
  return _img_list(resp, fmt=out_fmt)
@@ -274,6 +307,10 @@ def variation_image(
274
  transparent_bg: bool,
275
  ):
276
  """Calls the OpenAI image variations endpoint."""
 
 
 
 
277
  if image_numpy is None:
278
  raise gr.Error("Please upload an image.")
279
  img_bytes = _bytes_from_numpy(image_numpy)
@@ -292,7 +329,19 @@ def variation_image(
292
  except openai.RateLimitError:
293
  raise gr.Error("Rate limit exceeded. Please try again later.")
294
  except openai.BadRequestError as e:
295
- raise gr.Error(f"OpenAI Bad Request: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
296
  except Exception as e:
297
  raise gr.Error(f"An unexpected error occurred: {e}")
298
  return _img_list(resp, fmt=out_fmt)
@@ -306,6 +355,7 @@ def build_ui():
306
  gr.Markdown(
307
  "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
308
  " This space uses the `gpt-image-1` model."
 
309
  )
310
 
311
  with gr.Accordion("🔐 API key", open=False):
@@ -317,15 +367,17 @@ def build_ui():
317
  size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
318
  quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
319
  with gr.Row():
320
- out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format")
321
- compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False)
322
- transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)")
323
 
324
  def _toggle_compression(fmt):
325
  return gr.update(visible=fmt in {"jpeg", "webp"})
326
 
327
  out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)
328
 
 
 
329
  with gr.Tabs():
330
  # ----- Generate Tab ----- #
331
  with gr.TabItem("Generate"):
@@ -335,22 +387,20 @@ def build_ui():
335
  gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
336
  btn_gen.click(
337
  generate,
338
- inputs=[api, prompt_gen, n_slider, size, quality, out_fmt, compression, transparent],
339
  outputs=gallery_gen,
340
  api_name="generate"
341
  )
342
 
343
  # ----- Edit Tab ----- #
344
  with gr.TabItem("Edit / Inpaint"):
345
- gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit).")
346
  with gr.Row():
347
  img_edit = gr.Image(label="Source Image", type="numpy", height=400)
348
  # Use ImageMask component for interactive painting
349
  mask_canvas = gr.ImageMask(
350
  label="Mask – Paint White Where Image Should Change",
351
  type="numpy", # Get mask as numpy array
352
- # brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"), # Force white brush
353
- # mask_opacity=0.7 # Adjust mask visibility on image
354
  height=400
355
  )
356
  with gr.Row():
@@ -359,21 +409,21 @@ def build_ui():
359
  gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
360
  btn_edit.click(
361
  edit_image,
362
- inputs=[api, img_edit, mask_canvas, prompt_edit, n_slider, size, quality, out_fmt, compression, transparent],
363
  outputs=gallery_edit,
364
  api_name="edit"
365
  )
366
 
367
  # ----- Variations Tab ----- #
368
- with gr.TabItem("Variations"):
369
- gr.Markdown("Upload an image to generate variations.")
370
  with gr.Row():
371
  img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
372
  btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
373
  gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
374
  btn_var.click(
375
  variation_image,
376
- inputs=[api, img_var, n_slider, size, quality, out_fmt, compression, transparent],
377
  outputs=gallery_var,
378
  api_name="variations"
379
  )
@@ -382,4 +432,6 @@ def build_ui():
382
 
383
  if __name__ == "__main__":
384
  app = build_ui()
385
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
  import io
 
26
  def _img_list(resp, *, fmt: str) -> List[str]:
27
  """Return list of data URLs or direct URLs depending on API response."""
28
  mime = f"image/{fmt}"
29
+ # Ensure b64_json exists and is not None/empty before using it
30
  return [
31
  f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
32
  for d in resp.data
 
46
  kwargs: Dict[str, Any] = dict(
47
  model=MODEL,
48
  n=n,
49
+ # REMOVED: response_format="b64_json", # This parameter caused the BadRequestError
50
  )
51
 
52
  # Use API defaults if 'auto' is selected
 
59
  if prompt is not None:
60
  kwargs["prompt"] = prompt
61
 
62
+ # Output format specific settings (API default is png)
63
+ if out_fmt != "png":
64
  kwargs["output_format"] = out_fmt
65
 
66
  # Transparency via background parameter (png & webp only)
 
69
 
70
  # Compression for lossy formats (API expects integer 0-100)
71
  if out_fmt in {"jpeg", "webp"}:
72
+ # Ensure compression is an integer as expected by the API
73
+ kwargs["output_compression"] = int(compression)
74
 
75
  return kwargs
76
 
 
97
  except openai.AuthenticationError:
98
  raise gr.Error("Invalid OpenAI API key.")
99
  except openai.PermissionDeniedError:
100
+ raise gr.Error("Permission denied. Check your API key permissions or complete required verification for gpt-image-1.")
101
  except openai.RateLimitError:
102
  raise gr.Error("Rate limit exceeded. Please try again later.")
103
  except openai.BadRequestError as e:
104
+ # Extract the specific error message if possible
105
+ error_message = str(e)
106
+ try:
107
+ # Attempt to parse the error body if it's JSON-like
108
+ import json
109
+ body = json.loads(str(e.body)) # e.body might be bytes
110
+ if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
111
+ error_message = f"OpenAI Bad Request: {body['error']['message']}"
112
+ else:
113
+ error_message = f"OpenAI Bad Request: {e}"
114
+ except:
115
+ error_message = f"OpenAI Bad Request: {e}" # Fallback
116
+ raise gr.Error(error_message)
117
  except Exception as e:
118
  raise gr.Error(f"An unexpected error occurred: {e}")
119
  return _img_list(resp, fmt=out_fmt)
 
191
 
192
  if mask_numpy is not None:
193
  # Check if the mask seems empty (all black or fully transparent)
194
+ is_empty = False
195
+ if mask_numpy.ndim == 2: # Grayscale
196
+ is_empty = np.all(mask_numpy == 0)
197
+ elif mask_numpy.shape[-1] == 4: # RGBA
198
+ is_empty = np.all(mask_numpy[:, :, 3] == 0)
199
+ elif mask_numpy.shape[-1] == 3: # RGB
200
+ is_empty = np.all(mask_numpy == 0)
201
+
202
+ if is_empty:
203
+ gr.Warning("The provided mask appears empty (all black/transparent). The API might edit the entire image or ignore the mask.")
204
+ # Pass None if the mask is effectively empty, as per API docs (transparent areas are edited)
205
  mask_bytes = None
206
  else:
207
+ # Convert the mask to the format required by the API:
208
+ # A PNG image where TRANSPARENT areas indicate where the image should be edited.
209
+ # Our Gradio mask uses WHITE to indicate the edit area.
210
+ # So, we need to create an alpha channel where white pixels in the input mask become transparent (0),
211
+ # and black/other pixels become opaque (255).
212
+
213
+ if mask_numpy.ndim == 2: # Grayscale input mask
214
+ # Assume white (255) means edit -> make transparent (0 alpha)
215
+ # Assume black (0) means keep -> make opaque (255 alpha)
216
+ alpha = (mask_numpy == 0).astype(np.uint8) * 255
217
+ elif mask_numpy.shape[-1] == 4: # RGBA input mask (from gr.ImageMask)
218
+ # Use the alpha channel directly if it exists and seems meaningful,
219
+ # otherwise, treat non-black RGB as edit area.
220
+ # gr.ImageMask often returns RGBA where painted area is white [255,255,255,255] and background is [0,0,0,0]
221
+ # We want the painted (white) area to be transparent in the final mask.
222
+ # We want the unpainted (transparent black) area to be opaque in the final mask.
223
+ alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255
224
+ elif mask_numpy.shape[-1] == 3: # RGB input mask
225
+ # Assume white [255, 255, 255] means edit -> make transparent (0 alpha)
226
+ # Assume black [0, 0, 0] or other colors mean keep -> make opaque (255 alpha)
227
+ is_white = np.all(mask_numpy == [255, 255, 255], axis=-1)
228
+ alpha = (~is_white).astype(np.uint8) * 255
229
  else:
230
  raise gr.Error("Unsupported mask format.")
231
 
232
+ # Create a single-channel L mode image (grayscale/alpha) for the mask
233
  mask_img = Image.fromarray(alpha, mode='L')
234
+
235
+ # The API expects an RGBA PNG where the alpha channel defines the mask.
236
+ # Create a black image with the calculated alpha channel.
237
+ rgba_mask = Image.new("RGBA", mask_img.size, (0, 0, 0, 0))
238
+ black_opaque = Image.new("L", mask_img.size, 0) # Black base
239
+ rgba_mask.putalpha(mask_img) # Use the calculated alpha
240
+
241
  out = io.BytesIO()
242
+ rgba_mask.save(out, format="PNG")
243
  mask_bytes = out.getvalue()
244
 
245
  # Debug: Save mask locally to check
246
+ # rgba_mask.save("debug_mask_sent_to_api.png")
247
 
248
  else:
249
+ gr.Info("No mask provided. The API will attempt to edit the image based on the prompt without a specific mask.")
250
  mask_bytes = None # Explicitly pass None if no mask is usable
251
 
252
  client = _client(api_key)
 
256
  if "prompt" not in common_args:
257
  common_args["prompt"] = prompt # Should always be there via _common_kwargs, but safeguard
258
 
259
+ # Ensure image and mask are passed correctly
260
+ api_kwargs = {
261
+ "image": img_bytes,
262
+ **common_args
263
+ }
264
+ if mask_bytes is not None:
265
+ api_kwargs["mask"] = mask_bytes
266
+
267
+ resp = client.images.edit(**api_kwargs)
268
+
269
  except openai.AuthenticationError:
270
  raise gr.Error("Invalid OpenAI API key.")
271
  except openai.PermissionDeniedError:
272
+ raise gr.Error("Permission denied. Check your API key permissions or complete required verification for gpt-image-1.")
273
  except openai.RateLimitError:
274
  raise gr.Error("Rate limit exceeded. Please try again later.")
275
  except openai.BadRequestError as e:
276
+ error_message = str(e)
277
+ try:
278
+ import json
279
+ body = json.loads(str(e.body))
280
+ if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
281
+ error_message = f"OpenAI Bad Request: {body['error']['message']}"
282
+ # Add specific advice based on common mask errors
283
+ if "mask" in error_message.lower():
284
+ error_message += " (Ensure mask is a valid PNG with an alpha channel and matches the image dimensions.)"
285
+ elif "size" in error_message.lower():
286
+ error_message += " (Ensure image and mask dimensions match and are supported.)"
287
+ else:
288
+ error_message = f"OpenAI Bad Request: {e}"
289
+ except:
290
+ error_message = f"OpenAI Bad Request: {e}" # Fallback
291
+ raise gr.Error(error_message)
292
  except Exception as e:
293
  raise gr.Error(f"An unexpected error occurred: {e}")
294
  return _img_list(resp, fmt=out_fmt)
 
307
  transparent_bg: bool,
308
  ):
309
  """Calls the OpenAI image variations endpoint."""
310
+ # NOTE: Variations are only supported for DALL-E 2 according to docs.
311
+ # This might fail with gpt-image-1. Consider adding a check or using DALL-E 2.
312
+ gr.Warning("Note: Image variations are officially supported for DALL·E 2, not gpt-image-1. This may not work as expected.")
313
+
314
  if image_numpy is None:
315
  raise gr.Error("Please upload an image.")
316
  img_bytes = _bytes_from_numpy(image_numpy)
 
329
  except openai.RateLimitError:
330
  raise gr.Error("Rate limit exceeded. Please try again later.")
331
  except openai.BadRequestError as e:
332
+ error_message = str(e)
333
+ try:
334
+ import json
335
+ body = json.loads(str(e.body))
336
+ if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
337
+ error_message = f"OpenAI Bad Request: {body['error']['message']}"
338
+ if "model does not support variations" in error_message.lower():
339
+ error_message += " (gpt-image-1 does not support variations, use DALL·E 2 instead)."
340
+ else:
341
+ error_message = f"OpenAI Bad Request: {e}"
342
+ except:
343
+ error_message = f"OpenAI Bad Request: {e}" # Fallback
344
+ raise gr.Error(error_message)
345
  except Exception as e:
346
  raise gr.Error(f"An unexpected error occurred: {e}")
347
  return _img_list(resp, fmt=out_fmt)
 
355
  gr.Markdown(
356
  "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
357
  " This space uses the `gpt-image-1` model."
358
+ " **Note:** `gpt-image-1` may require organization verification. Variations endpoint might not work with this model (use DALL·E 2)."
359
  )
360
 
361
  with gr.Accordion("🔐 API key", open=False):
 
367
  size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
368
  quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
369
  with gr.Row():
370
+ out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format", scale=1)
371
+ compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False, scale=2)
372
+ transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)", scale=1)
373
 
374
  def _toggle_compression(fmt):
375
  return gr.update(visible=fmt in {"jpeg", "webp"})
376
 
377
  out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)
378
 
379
+ common_inputs = [api, n_slider, size, quality, out_fmt, compression, transparent]
380
+
381
  with gr.Tabs():
382
  # ----- Generate Tab ----- #
383
  with gr.TabItem("Generate"):
 
387
  gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
388
  btn_gen.click(
389
  generate,
390
+ inputs=[prompt_gen] + common_inputs, # Prepend specific inputs
391
  outputs=gallery_gen,
392
  api_name="generate"
393
  )
394
 
395
  # ----- Edit Tab ----- #
396
  with gr.TabItem("Edit / Inpaint"):
397
+ gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit area). The API requires the mask and image to have the same dimensions.")
398
  with gr.Row():
399
  img_edit = gr.Image(label="Source Image", type="numpy", height=400)
400
  # Use ImageMask component for interactive painting
401
  mask_canvas = gr.ImageMask(
402
  label="Mask – Paint White Where Image Should Change",
403
  type="numpy", # Get mask as numpy array
 
 
404
  height=400
405
  )
406
  with gr.Row():
 
409
  gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
410
  btn_edit.click(
411
  edit_image,
412
+ inputs=[img_edit, mask_canvas, prompt_edit] + common_inputs, # Prepend specific inputs
413
  outputs=gallery_edit,
414
  api_name="edit"
415
  )
416
 
417
  # ----- Variations Tab ----- #
418
+ with gr.TabItem("Variations (DALL·E 2 only)"):
419
+ gr.Markdown("Upload an image to generate variations. **Note:** This endpoint is officially supported for DALL·E 2, not `gpt-image-1`. It likely won't work here.")
420
  with gr.Row():
421
  img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
422
  btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
423
  gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
424
  btn_var.click(
425
  variation_image,
426
+ inputs=[img_var] + common_inputs, # Prepend specific inputs
427
  outputs=gallery_var,
428
  api_name="variations"
429
  )
 
432
 
433
  if __name__ == "__main__":
434
  app = build_ui()
435
+ # Set share=True to create a public link (useful for Spaces)
436
+ # Set debug=True for more detailed logs in the console
437
+ app.launch(share=os.getenv("GRADIO_SHARE") == "true", debug=True)