gpt-image-1-playground

Running

File size: 15,962 Bytes

"""
Gradio Space: GPT-Image-1 – BYOT playground
Generate · Edit (paint mask!) · Variations
==========================================
Adds an **in-browser paint tool** for the edit / inpaint workflow so users can
draw the mask directly instead of uploading one.

### How mask painting works
* Upload an image.
* Use the *Mask* canvas to **paint the areas you’d like changed** (white =
  editable, black = keep).
  The new `gr.ImageMask` component captures your brush strokes.
* The painted mask is converted to a 1‑channel PNG and sent to the
  `images.edit()` endpoint.

All other controls (size, quality, format, compression, n, background) stay the
same.
"""

from __future__ import annotations

import io
import os
from typing import List, Optional, Union, Dict, Any

import gradio as gr
import numpy as np
from PIL import Image
import openai

MODEL = "gpt-image-1"
SIZE_CHOICES = ["auto", "1024x1024", "1536x1024", "1024x1536"]
QUALITY_CHOICES = ["auto", "low", "medium", "high"]
FORMAT_CHOICES = ["png", "jpeg", "webp"]


def _client(key: str) -> openai.OpenAI:
    """Initializes the OpenAI client with the provided API key."""
    api_key = key.strip() or os.getenv("OPENAI_API_KEY", "")
    if not api_key:
        raise gr.Error("Please enter your OpenAI API key (never stored)")
    return openai.OpenAI(api_key=api_key)


def _img_list(resp, *, fmt: str) -> List[str]:
    """Return list of data URLs or direct URLs depending on API response."""
    mime = f"image/{fmt}"
    return [
        f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
        for d in resp.data
    ]


def _common_kwargs(
    prompt: Optional[str],
    n: int,
    size: str,
    quality: str,
    out_fmt: str,
    compression: int,
    transparent_bg: bool,
) -> Dict[str, Any]:
    """Prepare keyword arguments for Images API based on latest OpenAI spec."""
    kwargs: Dict[str, Any] = dict(
        model=MODEL,
        n=n,
        response_format="b64_json", # Request base64 to avoid potential URL expiry issues
    )

    # Use API defaults if 'auto' is selected
    if size != "auto":
        kwargs["size"] = size
    if quality != "auto":
        kwargs["quality"] = quality

    # Prompt is optional for variations
    if prompt is not None:
        kwargs["prompt"] = prompt

    # Output format specific settings
    if out_fmt != "png": # API default is png
        kwargs["output_format"] = out_fmt

    # Transparency via background parameter (png & webp only)
    if transparent_bg and out_fmt in {"png", "webp"}:
        kwargs["background"] = "transparent"

    # Compression for lossy formats (API expects integer 0-100)
    if out_fmt in {"jpeg", "webp"}:
        kwargs["output_compression"] = compression

    return kwargs


# ---------- Generate ---------- #

def generate(
    api_key: str,
    prompt: str,
    n: int,
    size: str,
    quality: str,
    out_fmt: str,
    compression: int,
    transparent_bg: bool,
):
    """Calls the OpenAI image generation endpoint."""
    if not prompt:
        raise gr.Error("Please enter a prompt.")
    client = _client(api_key)
    try:
        common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
        resp = client.images.generate(**common_args)
    except openai.AuthenticationError:
         raise gr.Error("Invalid OpenAI API key.")
    except openai.PermissionDeniedError:
        raise gr.Error("Permission denied. Check your API key permissions.")
    except openai.RateLimitError:
        raise gr.Error("Rate limit exceeded. Please try again later.")
    except openai.BadRequestError as e:
        raise gr.Error(f"OpenAI Bad Request: {e}")
    except Exception as e:
        raise gr.Error(f"An unexpected error occurred: {e}")
    return _img_list(resp, fmt=out_fmt)


# ---------- Edit / Inpaint ---------- #

def _bytes_from_numpy(arr: np.ndarray) -> bytes:
    """Convert RGBA/RGB uint8 numpy array to PNG bytes."""
    img = Image.fromarray(arr.astype(np.uint8))
    out = io.BytesIO()
    img.save(out, format="PNG")
    return out.getvalue()


def _extract_mask_array(mask_value: Union[np.ndarray, Dict[str, Any], None]) -> Optional[np.ndarray]:
    """Handle ImageMask / ImageEditor return formats and extract a numpy mask array."""
    if mask_value is None:
        return None

    # If we already have a numpy array (ImageMask with type="numpy")
    if isinstance(mask_value, np.ndarray):
        mask_arr = mask_value
    # If it's an EditorValue dict coming from ImageEditor/ImageMask with type="file" or "pil"
    elif isinstance(mask_value, dict):
        # Prefer the composite (all layers merged) if present
        comp = mask_value.get("composite")
        if comp is not None and isinstance(comp, (Image.Image, np.ndarray)):
             mask_arr = np.array(comp) if isinstance(comp, Image.Image) else comp
        # Fallback to the mask if present (often from ImageMask)
        elif mask_value.get("mask") is not None and isinstance(mask_value["mask"], (Image.Image, np.ndarray)):
             mask_arr = np.array(mask_value["mask"]) if isinstance(mask_value["mask"], Image.Image) else mask_value["mask"]
        # Fallback to the topmost layer
        elif mask_value.get("layers"):
            top_layer = mask_value["layers"][-1]
            if isinstance(top_layer, (Image.Image, np.ndarray)):
                 mask_arr = np.array(top_layer) if isinstance(top_layer, Image.Image) else top_layer
            else:
                 return None # Cannot process layer format
        else:
            return None # No usable image data found in dict
    else:
        # Unknown format – ignore
        return None

    # Ensure mask_arr is a numpy array now
    if not isinstance(mask_arr, np.ndarray):
        return None # Should not happen after above checks, but safeguard

    return mask_arr


def edit_image(
    api_key: str,
    image_numpy: np.ndarray,
    mask_value: Optional[Union[np.ndarray, Dict[str, Any]]],
    prompt: str,
    n: int,
    size: str,
    quality: str,
    out_fmt: str,
    compression: int,
    transparent_bg: bool,
):
    """Calls the OpenAI image edit endpoint."""
    if image_numpy is None:
        raise gr.Error("Please upload an image.")
    if not prompt:
        raise gr.Error("Please enter an edit prompt.")

    img_bytes = _bytes_from_numpy(image_numpy)

    mask_bytes: Optional[bytes] = None
    mask_numpy = _extract_mask_array(mask_value)

    if mask_numpy is not None:
        # Check if the mask seems empty (all black or fully transparent)
        if np.all(mask_numpy == 0) or (mask_numpy.shape[-1] == 4 and np.all(mask_numpy[:, :, 3] == 0)):
             gr.Warning("The provided mask appears empty. The entire image might be edited if no mask is applied by the API.")
             # We explicitly pass None if the mask is effectively empty,
             # letting the API decide how to handle it (might vary by model/version)
             mask_bytes = None
        else:
            # Convert painted area (any non-black pixel or non-transparent pixel) to white, else black; 1‑channel alpha.
            # The API expects the mask as a single alpha channel where transparency indicates the area to edit.
            # White in our canvas means "edit", so this needs to become transparent in the mask sent to the API.
            # Black in our canvas means "keep", so this needs to become opaque in the mask sent to the API.

            if mask_numpy.ndim == 2: # Grayscale
                alpha = (mask_numpy == 0).astype(np.uint8) * 255 # Black becomes opaque (255), white becomes transparent (0)
            elif mask_numpy.shape[-1] == 4:  # RGBA (use alpha channel)
                alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255 # Transparent becomes opaque, opaque becomes transparent
            elif mask_numpy.shape[-1] == 3: # RGB
                # Consider any non-black pixel as the area to edit (becomes transparent)
                 alpha = np.all(mask_numpy == [0, 0, 0], axis=-1).astype(np.uint8) * 255
            else:
                 raise gr.Error("Unsupported mask format.")

            # Create a single-channel L mode image (grayscale) for the mask
            mask_img = Image.fromarray(alpha, mode='L')
            out = io.BytesIO()
            mask_img.save(out, format="PNG")
            mask_bytes = out.getvalue()

            # Debug: Save mask locally to check
            # mask_img.save("debug_mask_sent_to_api.png")

    else:
        gr.Warning("No mask provided or mask could not be processed. The API might edit the entire image or apply a default mask.")
        mask_bytes = None # Explicitly pass None if no mask is usable

    client = _client(api_key)
    try:
        common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
        # The edit endpoint requires the prompt
        if "prompt" not in common_args:
             common_args["prompt"] = prompt # Should always be there via _common_kwargs, but safeguard

        resp = client.images.edit(
            image=img_bytes,
            mask=mask_bytes, # Pass None if no mask or empty mask
            **common_args,
        )
    except openai.AuthenticationError:
         raise gr.Error("Invalid OpenAI API key.")
    except openai.PermissionDeniedError:
        raise gr.Error("Permission denied. Check your API key permissions.")
    except openai.RateLimitError:
        raise gr.Error("Rate limit exceeded. Please try again later.")
    except openai.BadRequestError as e:
        # Provide more specific feedback if possible
        if "mask" in str(e) and "alpha channel" in str(e):
             raise gr.Error("OpenAI API Error: The mask must be a PNG image with transparency indicating the edit area. Ensure your mask was processed correctly.")
        elif "size" in str(e):
             raise gr.Error(f"OpenAI API Error: Image and mask size mismatch or invalid size. Ensure image is square if required by the model. Error: {e}")
        else:
             raise gr.Error(f"OpenAI Bad Request: {e}")
    except Exception as e:
        raise gr.Error(f"An unexpected error occurred: {e}")
    return _img_list(resp, fmt=out_fmt)


# ---------- Variations ---------- #

def variation_image(
    api_key: str,
    image_numpy: np.ndarray,
    n: int,
    size: str,
    quality: str,
    out_fmt: str,
    compression: int,
    transparent_bg: bool,
):
    """Calls the OpenAI image variations endpoint."""
    if image_numpy is None:
        raise gr.Error("Please upload an image.")
    img_bytes = _bytes_from_numpy(image_numpy)
    client = _client(api_key)
    try:
        # Prompt is None for variations
        common_args = _common_kwargs(None, n, size, quality, out_fmt, compression, transparent_bg)
        resp = client.images.variations(
            image=img_bytes,
            **common_args,
        )
    except openai.AuthenticationError:
         raise gr.Error("Invalid OpenAI API key.")
    except openai.PermissionDeniedError:
        raise gr.Error("Permission denied. Check your API key permissions.")
    except openai.RateLimitError:
        raise gr.Error("Rate limit exceeded. Please try again later.")
    except openai.BadRequestError as e:
        raise gr.Error(f"OpenAI Bad Request: {e}")
    except Exception as e:
        raise gr.Error(f"An unexpected error occurred: {e}")
    return _img_list(resp, fmt=out_fmt)


# ---------- UI ---------- #

def build_ui():
    with gr.Blocks(title="GPT-Image-1 (BYOT)") as demo:
        gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask!) • Variations""")
        gr.Markdown(
             "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
             " This space uses the `gpt-image-1` model."
        )

        with gr.Accordion("🔐 API key", open=False):
            api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-…")

        # Common controls
        with gr.Row():
             n_slider = gr.Slider(1, 4, value=1, step=1, label="Number of images (n)", info="Max 4 for this demo.") # Limit n for stability/cost
             size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
             quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
        with gr.Row():
            out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format")
            compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False)
            transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)")

        def _toggle_compression(fmt):
            return gr.update(visible=fmt in {"jpeg", "webp"})

        out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)

        with gr.Tabs():
            # ----- Generate Tab ----- #
            with gr.TabItem("Generate"):
                with gr.Row():
                    prompt_gen = gr.Textbox(label="Prompt", lines=3, placeholder="A photorealistic ginger cat astronaut on Mars", scale=4)
                    btn_gen = gr.Button("Generate 🚀", variant="primary", scale=1)
                gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
                btn_gen.click(
                    generate,
                    inputs=[api, prompt_gen, n_slider, size, quality, out_fmt, compression, transparent],
                    outputs=gallery_gen,
                    api_name="generate"
                )

            # ----- Edit Tab ----- #
            with gr.TabItem("Edit / Inpaint"):
                gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit).")
                with gr.Row():
                    img_edit = gr.Image(label="Source Image", type="numpy", height=400)
                    # Use ImageMask component for interactive painting
                    mask_canvas = gr.ImageMask(
                         label="Mask – Paint White Where Image Should Change",
                         type="numpy", # Get mask as numpy array
                         # brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"), # Force white brush
                         # mask_opacity=0.7 # Adjust mask visibility on image
                         height=400
                    )
                with gr.Row():
                    prompt_edit = gr.Textbox(label="Edit prompt", lines=2, placeholder="Replace the sky with a starry night", scale=4)
                    btn_edit = gr.Button("Edit 🖌️", variant="primary", scale=1)
                gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
                btn_edit.click(
                    edit_image,
                    inputs=[api, img_edit, mask_canvas, prompt_edit, n_slider, size, quality, out_fmt, compression, transparent],
                    outputs=gallery_edit,
                    api_name="edit"
                )

            # ----- Variations Tab ----- #
            with gr.TabItem("Variations"):
                gr.Markdown("Upload an image to generate variations.")
                with gr.Row():
                    img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
                    btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
                gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
                btn_var.click(
                    variation_image,
                    inputs=[api, img_var, n_slider, size, quality, out_fmt, compression, transparent],
                    outputs=gallery_var,
                    api_name="variations"
                )

    return demo

if __name__ == "__main__":
    app = build_ui()
    app.launch()