Zack3D's picture
Update app.py
2841bef verified
raw
history blame
14.7 kB
from __future__ import annotations
import io
import os
from typing import List, Optional, Union, Dict, Any
import gradio as gr
import numpy as np
from PIL import Image
import openai
# --- Constants and Helper Functions (Keep as before) ---
MODEL = "gpt-image-1"
SIZE_CHOICES = ["auto", "1024x1024", "1536x1024", "1024x1536"]
QUALITY_CHOICES = ["auto", "low", "medium", "high"]
FORMAT_CHOICES = ["png", "jpeg", "webp"]
def _client(key: str) -> openai.OpenAI:
"""Initializes the OpenAI client with the provided API key."""
api_key = key.strip() or os.getenv("OPENAI_API_KEY", "")
if not api_key:
raise gr.Error("Please enter your OpenAI API key (never stored)")
return openai.OpenAI(api_key=api_key)
def _img_list(resp, *, fmt: str) -> List[str]:
"""Return list of data URLs or direct URLs depending on API response."""
mime = f"image/{fmt}"
# Ensure b64_json exists and is not None/empty before using it
return [
f"data:{mime};base64,{d.b64_json}" if hasattr(d, "b64_json") and d.b64_json else d.url
for d in resp.data
]
def _common_kwargs(
prompt: Optional[str],
n: int,
size: str,
quality: str,
out_fmt: str,
compression: int,
transparent_bg: bool,
) -> Dict[str, Any]:
"""Prepare keyword arguments for Images API based on latest OpenAI spec."""
kwargs: Dict[str, Any] = dict(
model=MODEL,
n=n,
)
if size != "auto":
kwargs["size"] = size
if quality != "auto":
kwargs["quality"] = quality
if prompt is not None:
kwargs["prompt"] = prompt
if out_fmt != "png":
kwargs["output_format"] = out_fmt
if transparent_bg and out_fmt in {"png", "webp"}:
kwargs["background"] = "transparent"
if out_fmt in {"jpeg", "webp"}:
kwargs["output_compression"] = int(compression)
return kwargs
# --- API Call Functions (Keep as corrected before) ---
# ---------- Generate ---------- #
def generate(
api_key: str,
prompt: str,
n: int,
size: str,
quality: str,
out_fmt: str,
compression: int,
transparent_bg: bool,
):
"""Calls the OpenAI image generation endpoint."""
if not prompt:
raise gr.Error("Please enter a prompt.")
client = _client(api_key) # API key used here
try:
common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
resp = client.images.generate(**common_args)
except openai.AuthenticationError:
raise gr.Error("Invalid OpenAI API key.")
except openai.PermissionDeniedError:
raise gr.Error("Permission denied. Check your API key permissions or complete required verification for gpt-image-1.")
except openai.RateLimitError:
raise gr.Error("Rate limit exceeded. Please try again later.")
except openai.BadRequestError as e:
error_message = str(e)
try:
import json
body = json.loads(str(e.body))
if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
error_message = f"OpenAI Bad Request: {body['error']['message']}"
else:
error_message = f"OpenAI Bad Request: {e}"
except:
error_message = f"OpenAI Bad Request: {e}"
raise gr.Error(error_message)
except Exception as e:
raise gr.Error(f"An unexpected error occurred: {e}")
return _img_list(resp, fmt=out_fmt)
# ---------- Edit / Inpaint ---------- #
def _bytes_from_numpy(arr: np.ndarray) -> bytes:
"""Convert RGBA/RGB uint8 numpy array to PNG bytes."""
img = Image.fromarray(arr.astype(np.uint8))
out = io.BytesIO()
img.save(out, format="PNG")
return out.getvalue()
def _extract_mask_array(mask_value: Union[np.ndarray, Dict[str, Any], None]) -> Optional[np.ndarray]:
"""Handle ImageMask / ImageEditor return formats and extract a numpy mask array."""
if mask_value is None: return None
if isinstance(mask_value, np.ndarray): return mask_value
if isinstance(mask_value, dict):
comp = mask_value.get("composite")
if comp is not None and isinstance(comp, (Image.Image, np.ndarray)):
return np.array(comp) if isinstance(comp, Image.Image) else comp
elif mask_value.get("mask") is not None and isinstance(mask_value["mask"], (Image.Image, np.ndarray)):
return np.array(mask_value["mask"]) if isinstance(mask_value["mask"], Image.Image) else mask_value["mask"]
elif mask_value.get("layers"):
top_layer = mask_value["layers"][-1]
if isinstance(top_layer, (Image.Image, np.ndarray)):
return np.array(top_layer) if isinstance(top_layer, Image.Image) else top_layer
return None
def edit_image(
api_key: str,
image_numpy: np.ndarray,
mask_value: Optional[Union[np.ndarray, Dict[str, Any]]],
prompt: str,
n: int,
size: str,
quality: str,
out_fmt: str,
compression: int,
transparent_bg: bool,
):
"""Calls the OpenAI image edit endpoint."""
if image_numpy is None: raise gr.Error("Please upload an image.")
if not prompt: raise gr.Error("Please enter an edit prompt.")
img_bytes = _bytes_from_numpy(image_numpy)
mask_bytes: Optional[bytes] = None
mask_numpy = _extract_mask_array(mask_value)
if mask_numpy is not None:
is_empty = False
if mask_numpy.ndim == 2: is_empty = np.all(mask_numpy == 0)
elif mask_numpy.shape[-1] == 4: is_empty = np.all(mask_numpy[:, :, 3] == 0)
elif mask_numpy.shape[-1] == 3: is_empty = np.all(mask_numpy == 0)
if is_empty:
gr.Warning("Mask appears empty. API might edit entire image or ignore mask.")
mask_bytes = None
else:
if mask_numpy.ndim == 2: alpha = (mask_numpy == 0).astype(np.uint8) * 255
elif mask_numpy.shape[-1] == 4: alpha = (mask_numpy[:, :, 3] == 0).astype(np.uint8) * 255
elif mask_numpy.shape[-1] == 3:
is_white = np.all(mask_numpy == [255, 255, 255], axis=-1)
alpha = (~is_white).astype(np.uint8) * 255
else: raise gr.Error("Unsupported mask format.")
mask_img = Image.fromarray(alpha, mode='L')
rgba_mask = Image.new("RGBA", mask_img.size, (0, 0, 0, 0))
rgba_mask.putalpha(mask_img)
out = io.BytesIO()
rgba_mask.save(out, format="PNG")
mask_bytes = out.getvalue()
else:
gr.Info("No mask provided. Editing without specific mask.")
mask_bytes = None
client = _client(api_key) # API key used here
try:
common_args = _common_kwargs(prompt, n, size, quality, out_fmt, compression, transparent_bg)
api_kwargs = {"image": img_bytes, **common_args}
if mask_bytes is not None: api_kwargs["mask"] = mask_bytes
resp = client.images.edit(**api_kwargs)
except openai.AuthenticationError:
raise gr.Error("Invalid OpenAI API key.")
except openai.PermissionDeniedError:
raise gr.Error("Permission denied. Check API key permissions/verification.")
except openai.RateLimitError:
raise gr.Error("Rate limit exceeded.")
except openai.BadRequestError as e:
error_message = str(e)
try:
import json
body = json.loads(str(e.body))
if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
error_message = f"OpenAI Bad Request: {body['error']['message']}"
if "mask" in error_message.lower(): error_message += " (Check mask format/dimensions)"
elif "size" in error_message.lower(): error_message += " (Check image/mask dimensions)"
else: error_message = f"OpenAI Bad Request: {e}"
except: error_message = f"OpenAI Bad Request: {e}"
raise gr.Error(error_message)
except Exception as e:
raise gr.Error(f"An unexpected error occurred: {e}")
return _img_list(resp, fmt=out_fmt)
# ---------- Variations ---------- #
def variation_image(
api_key: str,
image_numpy: np.ndarray,
n: int,
size: str,
quality: str,
out_fmt: str,
compression: int,
transparent_bg: bool,
):
"""Calls the OpenAI image variations endpoint."""
gr.Warning("Note: Variations may not work with gpt-image-1 (use DALL·E 2).")
if image_numpy is None: raise gr.Error("Please upload an image.")
img_bytes = _bytes_from_numpy(image_numpy)
client = _client(api_key) # API key used here
try:
common_args = _common_kwargs(None, n, size, quality, out_fmt, compression, transparent_bg)
resp = client.images.variations(image=img_bytes, **common_args)
except openai.AuthenticationError:
raise gr.Error("Invalid OpenAI API key.")
except openai.PermissionDeniedError:
raise gr.Error("Permission denied.")
except openai.RateLimitError:
raise gr.Error("Rate limit exceeded.")
except openai.BadRequestError as e:
error_message = str(e)
try:
import json
body = json.loads(str(e.body))
if isinstance(body, dict) and 'error' in body and 'message' in body['error']:
error_message = f"OpenAI Bad Request: {body['error']['message']}"
if "model does not support variations" in error_message.lower():
error_message += " (gpt-image-1 does not support variations)."
else: error_message = f"OpenAI Bad Request: {e}"
except: error_message = f"OpenAI Bad Request: {e}"
raise gr.Error(error_message)
except Exception as e:
raise gr.Error(f"An unexpected error occurred: {e}")
return _img_list(resp, fmt=out_fmt)
# ---------- UI ---------- #
def build_ui():
with gr.Blocks(title="GPT-Image-1 (BYOT)") as demo:
gr.Markdown("""# GPT-Image-1 Playground 🖼️🔑\nGenerate • Edit (paint mask!) • Variations""")
gr.Markdown(
"Enter your OpenAI API key below. It's used directly for API calls and **never stored**."
" This space uses the `gpt-image-1` model."
" **Note:** `gpt-image-1` may require organization verification. Variations endpoint might not work with this model (use DALL·E 2)."
)
with gr.Accordion("🔐 API key", open=False):
# API key input component
api = gr.Textbox(label="OpenAI API key", type="password", placeholder="sk-…")
# Common controls
with gr.Row():
n_slider = gr.Slider(1, 4, value=1, step=1, label="Number of images (n)", info="Max 4 for this demo.")
size = gr.Dropdown(SIZE_CHOICES, value="auto", label="Size", info="API default if 'auto'.")
quality = gr.Dropdown(QUALITY_CHOICES, value="auto", label="Quality", info="API default if 'auto'.")
with gr.Row():
out_fmt = gr.Radio(FORMAT_CHOICES, value="png", label="Format", scale=1)
compression = gr.Slider(0, 100, value=75, step=1, label="Compression % (JPEG/WebP)", visible=False, scale=2)
transparent = gr.Checkbox(False, label="Transparent background (PNG/WebP only)", scale=1)
def _toggle_compression(fmt):
return gr.update(visible=fmt in {"jpeg", "webp"})
out_fmt.change(_toggle_compression, inputs=out_fmt, outputs=compression)
# Define the list of common controls *excluding* the API key
common_controls = [n_slider, size, quality, out_fmt, compression, transparent]
with gr.Tabs():
# ----- Generate Tab ----- #
with gr.TabItem("Generate"):
with gr.Row():
prompt_gen = gr.Textbox(label="Prompt", lines=3, placeholder="A photorealistic ginger cat astronaut on Mars", scale=4)
btn_gen = gr.Button("Generate 🚀", variant="primary", scale=1)
gallery_gen = gr.Gallery(label="Generated Images", columns=2, height="auto", preview=True)
# CORRECTED inputs list for generate
btn_gen.click(
generate,
inputs=[api, prompt_gen] + common_controls, # API key first
outputs=gallery_gen,
api_name="generate"
)
# ----- Edit Tab ----- #
with gr.TabItem("Edit / Inpaint"):
gr.Markdown("Upload an image, then **paint the area to change** in the mask canvas below (white = edit area). The API requires the mask and image to have the same dimensions.")
with gr.Row():
img_edit = gr.Image(label="Source Image", type="numpy", height=400)
mask_canvas = gr.ImageMask(
label="Mask – Paint White Where Image Should Change",
type="numpy",
height=400
)
with gr.Row():
prompt_edit = gr.Textbox(label="Edit prompt", lines=2, placeholder="Replace the sky with a starry night", scale=4)
btn_edit = gr.Button("Edit 🖌️", variant="primary", scale=1)
gallery_edit = gr.Gallery(label="Edited Images", columns=2, height="auto", preview=True)
# CORRECTED inputs list for edit_image
btn_edit.click(
edit_image,
inputs=[api, img_edit, mask_canvas, prompt_edit] + common_controls, # API key first
outputs=gallery_edit,
api_name="edit"
)
# ----- Variations Tab ----- #
with gr.TabItem("Variations (DALL·E 2 only)"):
gr.Markdown("Upload an image to generate variations. **Note:** This endpoint is officially supported for DALL·E 2, not `gpt-image-1`. It likely won't work here.")
with gr.Row():
img_var = gr.Image(label="Source Image", type="numpy", height=400, scale=4)
btn_var = gr.Button("Create Variations ✨", variant="primary", scale=1)
gallery_var = gr.Gallery(label="Variations", columns=2, height="auto", preview=True)
# CORRECTED inputs list for variation_image
btn_var.click(
variation_image,
inputs=[api, img_var] + common_controls, # API key first
outputs=gallery_var,
api_name="variations"
)
return demo
if __name__ == "__main__":
app = build_ui()
app.launch(share=os.getenv("GRADIO_SHARE") == "true", debug=True)