gpt-image-1-playground

Running

App Files Files Community

Zack3D commited on 15 days ago

Commit

68971bf

verified ·

1 Parent(s): 9047431

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -51

app.py CHANGED Viewed

@@ -6,9 +6,9 @@ Adds an **in-browser paint tool** for the edit / inpaint workflow so users can
 draw the mask directly instead of uploading one.
 ### How mask painting works
-* Upload an image.
 * Use the *Mask* canvas to **paint the areas you’d like changed** (white =
-  editable, black = keep).
   The new `gr.ImageMask` component captures your brush strokes.
 * The painted mask is converted to a 1‑channel PNG and sent to the
   `images.edit()` endpoint.
@@ -35,6 +35,7 @@ FORMAT_CHOICES = ["png", "jpeg", "webp"]
 def _client(key: str) -> openai.OpenAI:
     api_key = key.strip() or os.getenv("OPENAI_API_KEY", "")
     if not api_key:
         raise gr.Error("Please enter your OpenAI API key (never stored)")
@@ -45,7 +46,7 @@ def _img_list(resp, *, fmt: str) -> List[str]:
     """Return list of data URLs or direct URLs depending on API response."""
     mime = f"image/{fmt}"
     return [
-        f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") else d.url
         for d in resp.data
     ]
@@ -58,27 +59,35 @@ def _common_kwargs(
     out_fmt: str,
     compression: int,
     transparent_bg: bool,
-):
     """Prepare keyword arguments for Images API based on latest OpenAI spec."""
     kwargs: Dict[str, Any] = dict(
         model=MODEL,
         n=n,
-        size=size,
-        quality=quality,
-        output_format=out_fmt,
     )
     # Prompt is optional for variations
     if prompt is not None:
         kwargs["prompt"] = prompt
     # Transparency via background parameter (png & webp only)
-    if transparent_bg:
         kwargs["background"] = "transparent"
-    # Compression for lossy formats
     if out_fmt in {"jpeg", "webp"}:
-        kwargs["output_compression"] = f"{compression}%"
     return kwargs
@@ -95,11 +104,23 @@ def generate(
     compression: int,
     transparent_bg: bool,
 ):
     client = _client(api_key)
     try:
-        resp = client.images.generate(**_common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg))
     except Exception as e:
-        raise gr.Error(f"OpenAI error: {e}")
     return _img_list(resp, fmt=out_fmt)
@@ -120,20 +141,34 @@ def _extract_mask_array(mask_value: Union[np.ndarray, Dict[str, Any], None]) ->
     # If we already have a numpy array (ImageMask with type="numpy")
     if isinstance(mask_value, np.ndarray):
-        return mask_value
-    # If it's an EditorValue dict coming from ImageEditor/ImageMask with type="numpy"
-    if isinstance(mask_value, dict):
         # Prefer the composite (all layers merged) if present
         comp = mask_value.get("composite")
-        if comp is not None:
-            return np.asarray(comp)
         # Fallback to the topmost layer
-        layers = mask_value.get("layers")
-        if layers:
-            return np.asarray(layers[-1])
-    # Unknown format – ignore
-    return None
 def edit_image(
@@ -148,31 +183,81 @@ def edit_image(
     compression: int,
     transparent_bg: bool,
 ):
     if image_numpy is None:
         raise gr.Error("Please upload an image.")
     img_bytes = _bytes_from_numpy(image_numpy)
     mask_bytes: Optional[bytes] = None
     mask_numpy = _extract_mask_array(mask_value)
     if mask_numpy is not None:
-        # Convert painted area (any non-zero pixel) to white, else black; 1‑channel.
-        if mask_numpy.shape[-1] == 4:  # RGBA (has alpha channel)
-            alpha = mask_numpy[:, :, 3]
-        else:  # RGB or grayscale
-            alpha = np.any(mask_numpy != 0, axis=-1).astype(np.uint8) * 255
-        bw = np.stack([alpha] * 3, axis=-1)  # 3‑channel white/black
-        mask_bytes = _bytes_from_numpy(bw)
     client = _client(api_key)
     try:
         resp = client.images.edit(
             image=img_bytes,
-            mask=mask_bytes,
-            **_common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg),
         )
     except Exception as e:
-        raise gr.Error(f"OpenAI error: {e}")
     return _img_list(resp, fmt=out_fmt)
@@ -188,17 +273,28 @@ def variation_image(
     compression: int,
     transparent_bg: bool,
 ):
     if image_numpy is None:
         raise gr.Error("Please upload an image.")
     img_bytes = _bytes_from_numpy(image_numpy)
     client = _client(api_key)
     try:
         resp = client.images.variations(
             image=img_bytes,
-            **_common_kwargs(None, n, size, quality, out_fmt, compression, transparent_bg),
         )
     except Exception as e:
-        raise gr.Error(f"OpenAI error: {e}")
     return _img_list(resp, fmt=out_fmt)
@@ -206,18 +302,24 @@ def variation_image(
 def build_ui():
     with gr.Blocks(title="GPT-Image-1 (BYOT)") as demo:
-        gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask) • Variations""")
         with gr.Accordion("🔐 API key", open=False):
             api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-…")
         # Common controls
-        n_slider = gr.Slider(1, 10, value=1, step=1, label="Number of images (n)")
-        size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size")
-        quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality")
-        out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format")
-        compression = gr.Slider(0, 100, value=75, step=1, label="Compression (JPEG/WebP)")
-        transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)")
         def _toggle_compression(fmt):
             return gr.update(visible=fmt in {"jpeg", "webp"})
@@ -227,28 +329,57 @@ def build_ui():
         with gr.Tabs():
             # ----- Generate Tab ----- #
             with gr.TabItem("Generate"):
-                prompt_gen = gr.Textbox(label="Prompt", lines=2, placeholder="A photorealistic ginger cat astronaut on Mars")
-                btn_gen = gr.Button("Generate 🚀")
-                gallery_gen = gr.Gallery(columns=2, height="auto")
                 btn_gen.click(
                     generate,
                     inputs=[api, prompt_gen, n_slider, size, quality, out_fmt, compression, transparent],
                     outputs=gallery_gen,
                 )
             # ----- Edit Tab ----- #
             with gr.TabItem("Edit / Inpaint"):
-                img_edit = gr.Image(label="Image", type="numpy")
-                mask_canvas = gr.ImageMask(label="Mask – paint white where the image should change", type="numpy")
-                prompt_edit = gr.Textbox(label="Edit prompt", lines=2, placeholder="Replace the sky with a starry night")
-                btn_edit = gr.Button("Edit 🖌️")
-                gallery_edit = gr.Gallery(columns=2, height="auto")
                 btn_edit.click(
                     edit_image,
                     inputs=[api, img_edit, mask_canvas, prompt_edit, n_slider, size, quality, out_fmt, compression, transparent],
                     outputs=gallery_edit,
                 )
             # ----- Variations Tab ----- #
             with gr.TabItem("Variations"):
-                img_var =

 draw the mask directly instead of uploading one.
 ### How mask painting works
+* Upload an image.
 * Use the *Mask* canvas to **paint the areas you’d like changed** (white =
+  editable, black = keep).
   The new `gr.ImageMask` component captures your brush strokes.
 * The painted mask is converted to a 1‑channel PNG and sent to the
   `images.edit()` endpoint.
 def _client(key: str) -> openai.OpenAI:
+    """Initializes the OpenAI client with the provided API key."""
     api_key = key.strip() or os.getenv("OPENAI_API_KEY", "")
     if not api_key:
         raise gr.Error("Please enter your OpenAI API key (never stored)")
     """Return list of data URLs or direct URLs depending on API response."""
     mime = f"image/{fmt}"
     return [
+        f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
         for d in resp.data
     ]
     out_fmt: str,
     compression: int,
     transparent_bg: bool,
+) -> Dict[str, Any]:
     """Prepare keyword arguments for Images API based on latest OpenAI spec."""
     kwargs: Dict[str, Any] = dict(
         model=MODEL,
         n=n,
+        response_format="b64_json", # Request base64 to avoid potential URL expiry issues
     )
+    # Use API defaults if 'auto' is selected
+    if size != "auto":
+        kwargs["size"] = size
+    if quality != "auto":
+        kwargs["quality"] = quality
     # Prompt is optional for variations
     if prompt is not None:
         kwargs["prompt"] = prompt
+    # Output format specific settings
+    if out_fmt != "png": # API default is png
+        kwargs["output_format"] = out_fmt
     # Transparency via background parameter (png & webp only)
+    if transparent_bg and out_fmt in {"png", "webp"}:
         kwargs["background"] = "transparent"
+    # Compression for lossy formats (API expects integer 0-100)
     if out_fmt in {"jpeg", "webp"}:
+        kwargs["output_compression"] = compression
     return kwargs
     compression: int,
     transparent_bg: bool,
 ):
+    """Calls the OpenAI image generation endpoint."""
+    if not prompt:
+        raise gr.Error("Please enter a prompt.")
     client = _client(api_key)
     try:
+        common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
+        resp = client.images.generate(**common_args)
+    except openai.AuthenticationError:
+         raise gr.Error("Invalid OpenAI API key.")
+    except openai.PermissionDeniedError:
+        raise gr.Error("Permission denied. Check your API key permissions.")
+    except openai.RateLimitError:
+        raise gr.Error("Rate limit exceeded. Please try again later.")
+    except openai.BadRequestError as e:
+        raise gr.Error(f"OpenAI Bad Request: {e}")
     except Exception as e:
+        raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
     # If we already have a numpy array (ImageMask with type="numpy")
     if isinstance(mask_value, np.ndarray):
+        mask_arr = mask_value
+    # If it's an EditorValue dict coming from ImageEditor/ImageMask with type="file" or "pil"
+    elif isinstance(mask_value, dict):
         # Prefer the composite (all layers merged) if present
         comp = mask_value.get("composite")
+        if comp is not None and isinstance(comp, (Image.Image, np.ndarray)):
+             mask_arr = np.array(comp) if isinstance(comp, Image.Image) else comp
+        # Fallback to the mask if present (often from ImageMask)
+        elif mask_value.get("mask") is not None and isinstance(mask_value["mask"], (Image.Image, np.ndarray)):
+             mask_arr = np.array(mask_value["mask"]) if isinstance(mask_value["mask"], Image.Image) else mask_value["mask"]
         # Fallback to the topmost layer
+        elif mask_value.get("layers"):
+            top_layer = mask_value["layers"][-1]
+            if isinstance(top_layer, (Image.Image, np.ndarray)):
+                 mask_arr = np.array(top_layer) if isinstance(top_layer, Image.Image) else top_layer
+            else:
+                 return None # Cannot process layer format
+        else:
+            return None # No usable image data found in dict
+    else:
+        # Unknown format – ignore
+        return None
+    # Ensure mask_arr is a numpy array now
+    if not isinstance(mask_arr, np.ndarray):
+        return None # Should not happen after above checks, but safeguard
+    return mask_arr
 def edit_image(
     compression: int,
     transparent_bg: bool,
 ):
+    """Calls the OpenAI image edit endpoint."""
     if image_numpy is None:
         raise gr.Error("Please upload an image.")
+    if not prompt:
+        raise gr.Error("Please enter an edit prompt.")
     img_bytes = _bytes_from_numpy(image_numpy)
     mask_bytes: Optional[bytes] = None
     mask_numpy = _extract_mask_array(mask_value)
     if mask_numpy is not None:
+        # Check if the mask seems empty (all black or fully transparent)
+        if np.all(mask_numpy == 0) or (mask_numpy.shape[-1] == 4 and np.all(mask_numpy[:, :, 3] == 0)):
+             gr.Warning("The provided mask appears empty. The entire image might be edited if no mask is applied by the API.")
+             # We explicitly pass None if the mask is effectively empty,
+             # letting the API decide how to handle it (might vary by model/version)
+             mask_bytes = None
+        else:
+            # Convert painted area (any non-black pixel or non-transparent pixel) to white, else black; 1‑channel alpha.
+            # The API expects the mask as a single alpha channel where transparency indicates the area to edit.
+            # White in our canvas means "edit", so this needs to become transparent in the mask sent to the API.
+            # Black in our canvas means "keep", so this needs to become opaque in the mask sent to the API.
+            if mask_numpy.ndim == 2: # Grayscale
+                alpha = (mask_numpy == 0).astype(np.uint8) * 255 # Black becomes opaque (255), white becomes transparent (0)
+            elif mask_numpy.shape[-1] == 4:  # RGBA (use alpha channel)
+                alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255 # Transparent becomes opaque, opaque becomes transparent
+            elif mask_numpy.shape[-1] == 3: # RGB
+                # Consider any non-black pixel as the area to edit (becomes transparent)
+                 alpha = np.all(mask_numpy == [0, 0, 0], axis=-1).astype(np.uint8) * 255
+            else:
+                 raise gr.Error("Unsupported mask format.")
+            # Create a single-channel L mode image (grayscale) for the mask
+            mask_img = Image.fromarray(alpha, mode='L')
+            out = io.BytesIO()
+            mask_img.save(out, format="PNG")
+            mask_bytes = out.getvalue()
+            # Debug: Save mask locally to check
+            # mask_img.save("debug_mask_sent_to_api.png")
+    else:
+        gr.Warning("No mask provided or mask could not be processed. The API might edit the entire image or apply a default mask.")
+        mask_bytes = None # Explicitly pass None if no mask is usable
     client = _client(api_key)
     try:
+        common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
+        # The edit endpoint requires the prompt
+        if "prompt" not in common_args:
+             common_args["prompt"] = prompt # Should always be there via _common_kwargs, but safeguard
         resp = client.images.edit(
             image=img_bytes,
+            mask=mask_bytes, # Pass None if no mask or empty mask
+            **common_args,
         )
+    except openai.AuthenticationError:
+         raise gr.Error("Invalid OpenAI API key.")
+    except openai.PermissionDeniedError:
+        raise gr.Error("Permission denied. Check your API key permissions.")
+    except openai.RateLimitError:
+        raise gr.Error("Rate limit exceeded. Please try again later.")
+    except openai.BadRequestError as e:
+        # Provide more specific feedback if possible
+        if "mask" in str(e) and "alpha channel" in str(e):
+             raise gr.Error("OpenAI API Error: The mask must be a PNG image with transparency indicating the edit area. Ensure your mask was processed correctly.")
+        elif "size" in str(e):
+             raise gr.Error(f"OpenAI API Error: Image and mask size mismatch or invalid size. Ensure image is square if required by the model. Error: {e}")
+        else:
+             raise gr.Error(f"OpenAI Bad Request: {e}")
     except Exception as e:
+        raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
     compression: int,
     transparent_bg: bool,
 ):
+    """Calls the OpenAI image variations endpoint."""
     if image_numpy is None:
         raise gr.Error("Please upload an image.")
     img_bytes = _bytes_from_numpy(image_numpy)
     client = _client(api_key)
     try:
+        # Prompt is None for variations
+        common_args = _common_kwargs(None, n, size, quality, out_fmt, compression, transparent_bg)
         resp = client.images.variations(
             image=img_bytes,
+            **common_args,
         )
+    except openai.AuthenticationError:
+         raise gr.Error("Invalid OpenAI API key.")
+    except openai.PermissionDeniedError:
+        raise gr.Error("Permission denied. Check your API key permissions.")
+    except openai.RateLimitError:
+        raise gr.Error("Rate limit exceeded. Please try again later.")
+    except openai.BadRequestError as e:
+        raise gr.Error(f"OpenAI Bad Request: {e}")
     except Exception as e:
+        raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
 def build_ui():
     with gr.Blocks(title="GPT-Image-1 (BYOT)") as demo:
+        gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask!) • Variations""")
+        gr.Markdown(
+             "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
+             " This space uses the `gpt-image-1` model."
+        )
         with gr.Accordion("🔐 API key", open=False):
             api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-…")
         # Common controls
+        with gr.Row():
+             n_slider = gr.Slider(1, 4, value=1, step=1, label="Number of images (n)", info="Max 4 for this demo.") # Limit n for stability/cost
+             size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
+             quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
+        with gr.Row():
+            out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format")
+            compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False)
+            transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)")
         def _toggle_compression(fmt):
             return gr.update(visible=fmt in {"jpeg", "webp"})
         with gr.Tabs():
             # ----- Generate Tab ----- #
             with gr.TabItem("Generate"):
+                with gr.Row():
+                    prompt_gen = gr.Textbox(label="Prompt", lines=3, placeholder="A photorealistic ginger cat astronaut on Mars", scale=4)
+                    btn_gen = gr.Button("Generate 🚀", variant="primary", scale=1)
+                gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
                 btn_gen.click(
                     generate,
                     inputs=[api, prompt_gen, n_slider, size, quality, out_fmt, compression, transparent],
                     outputs=gallery_gen,
+                    api_name="generate"
                 )
             # ----- Edit Tab ----- #
             with gr.TabItem("Edit / Inpaint"):
+                gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit).")
+                with gr.Row():
+                    img_edit = gr.Image(label="Source Image", type="numpy", height=400)
+                    # Use ImageMask component for interactive painting
+                    mask_canvas = gr.ImageMask(
+                         label="Mask – Paint White Where Image Should Change",
+                         type="numpy", # Get mask as numpy array
+                         # brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"), # Force white brush
+                         # mask_opacity=0.7 # Adjust mask visibility on image
+                         height=400
+                    )
+                with gr.Row():
+                    prompt_edit = gr.Textbox(label="Edit prompt", lines=2, placeholder="Replace the sky with a starry night", scale=4)
+                    btn_edit = gr.Button("Edit 🖌️", variant="primary", scale=1)
+                gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
                 btn_edit.click(
                     edit_image,
                     inputs=[api, img_edit, mask_canvas, prompt_edit, n_slider, size, quality, out_fmt, compression, transparent],
                     outputs=gallery_edit,
+                    api_name="edit"
                 )
             # ----- Variations Tab ----- #
             with gr.TabItem("Variations"):
+                gr.Markdown("Upload an image to generate variations.")
+                with gr.Row():
+                    img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
+                    btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
+                gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
+                btn_var.click(
+                    variation_image,
+                    inputs=[api, img_var, n_slider, size, quality, out_fmt, compression, transparent],
+                    outputs=gallery_var,
+                    api_name="variations"
+                )
+    return demo
+if __name__ == "__main__":
+    app = build_ui()
+    app.launch()