gpt-image-1-playground

Running

App Files Files Community

Zack3D commited on 15 days ago

Commit

bc30d26

verified ·

1 Parent(s): 68971bf

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -71

app.py CHANGED Viewed

@@ -1,22 +1,3 @@
-"""
-Gradio Space: GPT-Image-1 – BYOT playground
-Generate · Edit (paint mask!) · Variations
-==========================================
-Adds an **in-browser paint tool** for the edit / inpaint workflow so users can
-draw the mask directly instead of uploading one.
-### How mask painting works
-* Upload an image.
-* Use the *Mask* canvas to **paint the areas you’d like changed** (white =
-  editable, black = keep).
-  The new `gr.ImageMask` component captures your brush strokes.
-* The painted mask is converted to a 1‑channel PNG and sent to the
-  `images.edit()` endpoint.
-All other controls (size, quality, format, compression, n, background) stay the
-same.
-"""
 from __future__ import annotations
 import io
@@ -45,6 +26,7 @@ def _client(key: str) -> openai.OpenAI:
 def _img_list(resp, *, fmt: str) -> List[str]:
     """Return list of data URLs or direct URLs depending on API response."""
     mime = f"image/{fmt}"
     return [
         f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
         for d in resp.data
@@ -64,7 +46,7 @@ def _common_kwargs(
     kwargs: Dict[str, Any] = dict(
         model=MODEL,
         n=n,
-        response_format="b64_json", # Request base64 to avoid potential URL expiry issues
     )
     # Use API defaults if 'auto' is selected
@@ -77,8 +59,8 @@ def _common_kwargs(
     if prompt is not None:
         kwargs["prompt"] = prompt
-    # Output format specific settings
-    if out_fmt != "png": # API default is png
         kwargs["output_format"] = out_fmt
     # Transparency via background parameter (png & webp only)
@@ -87,7 +69,8 @@ def _common_kwargs(
     # Compression for lossy formats (API expects integer 0-100)
     if out_fmt in {"jpeg", "webp"}:
-        kwargs["output_compression"] = compression
     return kwargs
@@ -114,11 +97,23 @@ def generate(
     except openai.AuthenticationError:
          raise gr.Error("Invalid OpenAI API key.")
     except openai.PermissionDeniedError:
-        raise gr.Error("Permission denied. Check your API key permissions.")
     except openai.RateLimitError:
         raise gr.Error("Rate limit exceeded. Please try again later.")
     except openai.BadRequestError as e:
-        raise gr.Error(f"OpenAI Bad Request: {e}")
     except Exception as e:
         raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
@@ -196,38 +191,62 @@ def edit_image(
     if mask_numpy is not None:
         # Check if the mask seems empty (all black or fully transparent)
-        if np.all(mask_numpy == 0) or (mask_numpy.shape[-1] == 4 and np.all(mask_numpy[:, :, 3] == 0)):
-             gr.Warning("The provided mask appears empty. The entire image might be edited if no mask is applied by the API.")
-             # We explicitly pass None if the mask is effectively empty,
-             # letting the API decide how to handle it (might vary by model/version)
              mask_bytes = None
         else:
-            # Convert painted area (any non-black pixel or non-transparent pixel) to white, else black; 1‑channel alpha.
-            # The API expects the mask as a single alpha channel where transparency indicates the area to edit.
-            # White in our canvas means "edit", so this needs to become transparent in the mask sent to the API.
-            # Black in our canvas means "keep", so this needs to become opaque in the mask sent to the API.
-            if mask_numpy.ndim == 2: # Grayscale
-                alpha = (mask_numpy == 0).astype(np.uint8) * 255 # Black becomes opaque (255), white becomes transparent (0)
-            elif mask_numpy.shape[-1] == 4:  # RGBA (use alpha channel)
-                alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255 # Transparent becomes opaque, opaque becomes transparent
-            elif mask_numpy.shape[-1] == 3: # RGB
-                # Consider any non-black pixel as the area to edit (becomes transparent)
-                 alpha = np.all(mask_numpy == [0, 0, 0], axis=-1).astype(np.uint8) * 255
             else:
                  raise gr.Error("Unsupported mask format.")
-            # Create a single-channel L mode image (grayscale) for the mask
             mask_img = Image.fromarray(alpha, mode='L')
             out = io.BytesIO()
-            mask_img.save(out, format="PNG")
             mask_bytes = out.getvalue()
             # Debug: Save mask locally to check
-            # mask_img.save("debug_mask_sent_to_api.png")
     else:
-        gr.Warning("No mask provided or mask could not be processed. The API might edit the entire image or apply a default mask.")
         mask_bytes = None # Explicitly pass None if no mask is usable
     client = _client(api_key)
@@ -237,25 +256,39 @@ def edit_image(
         if "prompt" not in common_args:
              common_args["prompt"] = prompt # Should always be there via _common_kwargs, but safeguard
-        resp = client.images.edit(
-            image=img_bytes,
-            mask=mask_bytes, # Pass None if no mask or empty mask
-            **common_args,
-        )
     except openai.AuthenticationError:
          raise gr.Error("Invalid OpenAI API key.")
     except openai.PermissionDeniedError:
-        raise gr.Error("Permission denied. Check your API key permissions.")
     except openai.RateLimitError:
         raise gr.Error("Rate limit exceeded. Please try again later.")
     except openai.BadRequestError as e:
-        # Provide more specific feedback if possible
-        if "mask" in str(e) and "alpha channel" in str(e):
-             raise gr.Error("OpenAI API Error: The mask must be a PNG image with transparency indicating the edit area. Ensure your mask was processed correctly.")
-        elif "size" in str(e):
-             raise gr.Error(f"OpenAI API Error: Image and mask size mismatch or invalid size. Ensure image is square if required by the model. Error: {e}")
-        else:
-             raise gr.Error(f"OpenAI Bad Request: {e}")
     except Exception as e:
         raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
@@ -274,6 +307,10 @@ def variation_image(
     transparent_bg: bool,
 ):
     """Calls the OpenAI image variations endpoint."""
     if image_numpy is None:
         raise gr.Error("Please upload an image.")
     img_bytes = _bytes_from_numpy(image_numpy)
@@ -292,7 +329,19 @@ def variation_image(
     except openai.RateLimitError:
         raise gr.Error("Rate limit exceeded. Please try again later.")
     except openai.BadRequestError as e:
-        raise gr.Error(f"OpenAI Bad Request: {e}")
     except Exception as e:
         raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
@@ -306,6 +355,7 @@ def build_ui():
         gr.Markdown(
              "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
              " This space uses the `gpt-image-1` model."
         )
         with gr.Accordion("🔐 API key", open=False):
@@ -317,15 +367,17 @@ def build_ui():
              size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
              quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
         with gr.Row():
-            out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format")
-            compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False)
-            transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)")
         def _toggle_compression(fmt):
             return gr.update(visible=fmt in {"jpeg", "webp"})
         out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)
         with gr.Tabs():
             # ----- Generate Tab ----- #
             with gr.TabItem("Generate"):
@@ -335,22 +387,20 @@ def build_ui():
                 gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
                 btn_gen.click(
                     generate,
-                    inputs=[api, prompt_gen, n_slider, size, quality, out_fmt, compression, transparent],
                     outputs=gallery_gen,
                     api_name="generate"
                 )
             # ----- Edit Tab ----- #
             with gr.TabItem("Edit / Inpaint"):
-                gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit).")
                 with gr.Row():
                     img_edit = gr.Image(label="Source Image", type="numpy", height=400)
                     # Use ImageMask component for interactive painting
                     mask_canvas = gr.ImageMask(
                          label="Mask – Paint White Where Image Should Change",
                          type="numpy", # Get mask as numpy array
-                         # brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"), # Force white brush
-                         # mask_opacity=0.7 # Adjust mask visibility on image
                          height=400
                     )
                 with gr.Row():
@@ -359,21 +409,21 @@ def build_ui():
                 gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
                 btn_edit.click(
                     edit_image,
-                    inputs=[api, img_edit, mask_canvas, prompt_edit, n_slider, size, quality, out_fmt, compression, transparent],
                     outputs=gallery_edit,
                     api_name="edit"
                 )
             # ----- Variations Tab ----- #
-            with gr.TabItem("Variations"):
-                gr.Markdown("Upload an image to generate variations.")
                 with gr.Row():
                     img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
                     btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
                 gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
                 btn_var.click(
                     variation_image,
-                    inputs=[api, img_var, n_slider, size, quality, out_fmt, compression, transparent],
                     outputs=gallery_var,
                     api_name="variations"
                 )
@@ -382,4 +432,6 @@ def build_ui():
 if __name__ == "__main__":
     app = build_ui()
-    app.launch()

 from __future__ import annotations
 import io
 def _img_list(resp, *, fmt: str) -> List[str]:
     """Return list of data URLs or direct URLs depending on API response."""
     mime = f"image/{fmt}"
+    # Ensure b64_json exists and is not None/empty before using it
     return [
         f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
         for d in resp.data
     kwargs: Dict[str, Any] = dict(
         model=MODEL,
         n=n,
+        # REMOVED: response_format="b64_json", # This parameter caused the BadRequestError
     )
     # Use API defaults if 'auto' is selected
     if prompt is not None:
         kwargs["prompt"] = prompt
+    # Output format specific settings (API default is png)
+    if out_fmt != "png":
         kwargs["output_format"] = out_fmt
     # Transparency via background parameter (png & webp only)
     # Compression for lossy formats (API expects integer 0-100)
     if out_fmt in {"jpeg", "webp"}:
+        # Ensure compression is an integer as expected by the API
+        kwargs["output_compression"] = int(compression)
     return kwargs
     except openai.AuthenticationError:
          raise gr.Error("Invalid OpenAI API key.")
     except openai.PermissionDeniedError:
+        raise gr.Error("Permission denied. Check your API key permissions or complete required verification for gpt-image-1.")
     except openai.RateLimitError:
         raise gr.Error("Rate limit exceeded. Please try again later.")
     except openai.BadRequestError as e:
+        # Extract the specific error message if possible
+        error_message = str(e)
+        try:
+            # Attempt to parse the error body if it's JSON-like
+            import json
+            body = json.loads(str(e.body)) # e.body might be bytes
+            if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
+                error_message = f"OpenAI Bad Request: {body['error']['message']}"
+            else:
+                 error_message = f"OpenAI Bad Request: {e}"
+        except:
+             error_message = f"OpenAI Bad Request: {e}" # Fallback
+        raise gr.Error(error_message)
     except Exception as e:
         raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
     if mask_numpy is not None:
         # Check if the mask seems empty (all black or fully transparent)
+        is_empty = False
+        if mask_numpy.ndim == 2: # Grayscale
+            is_empty = np.all(mask_numpy == 0)
+        elif mask_numpy.shape[-1] == 4: # RGBA
+            is_empty = np.all(mask_numpy[:, :, 3] == 0)
+        elif mask_numpy.shape[-1] == 3: # RGB
+            is_empty = np.all(mask_numpy == 0)
+        if is_empty:
+             gr.Warning("The provided mask appears empty (all black/transparent). The API might edit the entire image or ignore the mask.")
+             # Pass None if the mask is effectively empty, as per API docs (transparent areas are edited)
              mask_bytes = None
         else:
+            # Convert the mask to the format required by the API:
+            # A PNG image where TRANSPARENT areas indicate where the image should be edited.
+            # Our Gradio mask uses WHITE to indicate the edit area.
+            # So, we need to create an alpha channel where white pixels in the input mask become transparent (0),
+            # and black/other pixels become opaque (255).
+            if mask_numpy.ndim == 2: # Grayscale input mask
+                # Assume white (255) means edit -> make transparent (0 alpha)
+                # Assume black (0) means keep -> make opaque (255 alpha)
+                alpha = (mask_numpy == 0).astype(np.uint8) * 255
+            elif mask_numpy.shape[-1] == 4:  # RGBA input mask (from gr.ImageMask)
+                # Use the alpha channel directly if it exists and seems meaningful,
+                # otherwise, treat non-black RGB as edit area.
+                # gr.ImageMask often returns RGBA where painted area is white [255,255,255,255] and background is [0,0,0,0]
+                # We want the painted (white) area to be transparent in the final mask.
+                # We want the unpainted (transparent black) area to be opaque in the final mask.
+                alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255
+            elif mask_numpy.shape[-1] == 3: # RGB input mask
+                # Assume white [255, 255, 255] means edit -> make transparent (0 alpha)
+                # Assume black [0, 0, 0] or other colors mean keep -> make opaque (255 alpha)
+                is_white = np.all(mask_numpy == [255, 255, 255], axis=-1)
+                alpha = (~is_white).astype(np.uint8) * 255
             else:
                  raise gr.Error("Unsupported mask format.")
+            # Create a single-channel L mode image (grayscale/alpha) for the mask
             mask_img = Image.fromarray(alpha, mode='L')
+            # The API expects an RGBA PNG where the alpha channel defines the mask.
+            # Create a black image with the calculated alpha channel.
+            rgba_mask = Image.new("RGBA", mask_img.size, (0, 0, 0, 0))
+            black_opaque = Image.new("L", mask_img.size, 0) # Black base
+            rgba_mask.putalpha(mask_img) # Use the calculated alpha
             out = io.BytesIO()
+            rgba_mask.save(out, format="PNG")
             mask_bytes = out.getvalue()
             # Debug: Save mask locally to check
+            # rgba_mask.save("debug_mask_sent_to_api.png")
     else:
+        gr.Info("No mask provided. The API will attempt to edit the image based on the prompt without a specific mask.")
         mask_bytes = None # Explicitly pass None if no mask is usable
     client = _client(api_key)
         if "prompt" not in common_args:
              common_args["prompt"] = prompt # Should always be there via _common_kwargs, but safeguard
+        # Ensure image and mask are passed correctly
+        api_kwargs = {
+             "image": img_bytes,
+             **common_args
+        }
+        if mask_bytes is not None:
+             api_kwargs["mask"] = mask_bytes
+        resp = client.images.edit(**api_kwargs)
     except openai.AuthenticationError:
          raise gr.Error("Invalid OpenAI API key.")
     except openai.PermissionDeniedError:
+        raise gr.Error("Permission denied. Check your API key permissions or complete required verification for gpt-image-1.")
     except openai.RateLimitError:
         raise gr.Error("Rate limit exceeded. Please try again later.")
     except openai.BadRequestError as e:
+        error_message = str(e)
+        try:
+            import json
+            body = json.loads(str(e.body))
+            if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
+                error_message = f"OpenAI Bad Request: {body['error']['message']}"
+                # Add specific advice based on common mask errors
+                if "mask" in error_message.lower():
+                     error_message += " (Ensure mask is a valid PNG with an alpha channel and matches the image dimensions.)"
+                elif "size" in error_message.lower():
+                     error_message += " (Ensure image and mask dimensions match and are supported.)"
+            else:
+                 error_message = f"OpenAI Bad Request: {e}"
+        except:
+             error_message = f"OpenAI Bad Request: {e}" # Fallback
+        raise gr.Error(error_message)
     except Exception as e:
         raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
     transparent_bg: bool,
 ):
     """Calls the OpenAI image variations endpoint."""
+    # NOTE: Variations are only supported for DALL-E 2 according to docs.
+    # This might fail with gpt-image-1. Consider adding a check or using DALL-E 2.
+    gr.Warning("Note: Image variations are officially supported for DALL·E 2, not gpt-image-1. This may not work as expected.")
     if image_numpy is None:
         raise gr.Error("Please upload an image.")
     img_bytes = _bytes_from_numpy(image_numpy)
     except openai.RateLimitError:
         raise gr.Error("Rate limit exceeded. Please try again later.")
     except openai.BadRequestError as e:
+        error_message = str(e)
+        try:
+            import json
+            body = json.loads(str(e.body))
+            if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
+                 error_message = f"OpenAI Bad Request: {body['error']['message']}"
+                 if "model does not support variations" in error_message.lower():
+                      error_message += " (gpt-image-1 does not support variations, use DALL·E 2 instead)."
+            else:
+                 error_message = f"OpenAI Bad Request: {e}"
+        except:
+             error_message = f"OpenAI Bad Request: {e}" # Fallback
+        raise gr.Error(error_message)
     except Exception as e:
         raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
         gr.Markdown(
              "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
              " This space uses the `gpt-image-1` model."
+             " **Note:** `gpt-image-1` may require organization verification. Variations endpoint might not work with this model (use DALL·E 2)."
         )
         with gr.Accordion("🔐 API key", open=False):
              size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
              quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
         with gr.Row():
+            out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format", scale=1)
+            compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False, scale=2)
+            transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)", scale=1)
         def _toggle_compression(fmt):
             return gr.update(visible=fmt in {"jpeg", "webp"})
         out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)
+        common_inputs = [api, n_slider, size, quality, out_fmt, compression, transparent]
         with gr.Tabs():
             # ----- Generate Tab ----- #
             with gr.TabItem("Generate"):
                 gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
                 btn_gen.click(
                     generate,
+                    inputs=[prompt_gen] + common_inputs, # Prepend specific inputs
                     outputs=gallery_gen,
                     api_name="generate"
                 )
             # ----- Edit Tab ----- #
             with gr.TabItem("Edit / Inpaint"):
+                gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit area). The API requires the mask and image to have the same dimensions.")
                 with gr.Row():
                     img_edit = gr.Image(label="Source Image", type="numpy", height=400)
                     # Use ImageMask component for interactive painting
                     mask_canvas = gr.ImageMask(
                          label="Mask – Paint White Where Image Should Change",
                          type="numpy", # Get mask as numpy array
                          height=400
                     )
                 with gr.Row():
                 gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
                 btn_edit.click(
                     edit_image,
+                    inputs=[img_edit, mask_canvas, prompt_edit] + common_inputs, # Prepend specific inputs
                     outputs=gallery_edit,
                     api_name="edit"
                 )
             # ----- Variations Tab ----- #
+            with gr.TabItem("Variations (DALL·E 2 only)"):
+                gr.Markdown("Upload an image to generate variations. **Note:** This endpoint is officially supported for DALL·E 2, not `gpt-image-1`. It likely won't work here.")
                 with gr.Row():
                     img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
                     btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
                 gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
                 btn_var.click(
                     variation_image,
+                    inputs=[img_var] + common_inputs, # Prepend specific inputs
                     outputs=gallery_var,
                     api_name="variations"
                 )
 if __name__ == "__main__":
     app = build_ui()
+    # Set share=True to create a public link (useful for Spaces)
+    # Set debug=True for more detailed logs in the console
+    app.launch(share=os.getenv("GRADIO_SHARE") == "true", debug=True)