taesiri's picture
Upload 2 files
37696a6 verified
import os
import gradio as gr
from google import genai
from google.genai import types
import tempfile
import uuid
from pathlib import Path
# Initialize the Gemini client globally
client = None
if os.environ.get("GOOGLE_API_KEY"):
client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
def save_binary_file(file_name, data):
with open(file_name, "wb") as f:
f.write(data)
return file_name
def process_image_with_gemini(image, instruction) -> tuple[str, str, str]:
# Create output directory if it doesn't exist
output_dir = Path("output_gemini")
output_dir.mkdir(exist_ok=True)
# Generate a unique ID for this request
request_id = f"request_{uuid.uuid4().hex[:8]}"
request_folder = output_dir / request_id
request_folder.mkdir(exist_ok=True)
# Save the input image to the request folder
input_image_path = request_folder / "input.jpg"
image.save(input_image_path)
try:
# Create a temporary directory that will be automatically cleaned up
with tempfile.TemporaryDirectory() as temp_dir:
# Save the image to a temporary file
temp_image_path = Path(temp_dir) / "temp_input_image.jpg"
image.save(temp_image_path)
# Upload the temporary file to Gemini API using global client
files = [
client.files.upload(file=str(temp_image_path)),
]
model = "gemini-2.0-flash-exp-image-generation"
contents = [
types.Content(
role="user",
parts=[
types.Part.from_uri(
file_uri=files[0].uri,
mime_type="image/jpeg",
),
types.Part.from_text(text=instruction),
],
),
]
generate_content_config = types.GenerateContentConfig(
temperature=1,
top_p=0.95,
top_k=40,
max_output_tokens=8192,
response_modalities=[
"image",
"text",
],
safety_settings=[
types.SafetySetting(
category="HARM_CATEGORY_CIVIC_INTEGRITY",
threshold="OFF", # Off
),
],
response_mime_type="text/plain",
)
response_text = ""
edited_image_path = None
for chunk in client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
):
if (
not chunk.candidates
or not chunk.candidates[0].content
or not chunk.candidates[0].content.parts
):
continue
# Handle image response
if hasattr(chunk.candidates[0].content.parts[0], "inline_data"):
# Save the generated image
edited_image_path = request_folder / "edited.jpg"
save_binary_file(
str(edited_image_path),
chunk.candidates[0].content.parts[0].inline_data.data,
)
# Handle text response
elif hasattr(chunk.candidates[0].content.parts[0], "text"):
response_text += chunk.candidates[0].content.parts[0].text
# Simplify the return statement and ensure consistent types
if edited_image_path and edited_image_path.exists():
return str(edited_image_path), response_text or "", "Success"
return None, response_text or "", "No image generated"
except Exception as e:
error_message = str(e)
if (
"RESOURCE_EXHAUSTED" in error_message
or "rate limit" in error_message.lower()
):
return None, "", "Rate limit exceeded. Please try again later."
return None, "", f"Error: {error_message}"
def process_image(image, instruction):
"""Process an image with Gemini based on given instructions.
Args:
image: Input PIL image
instruction: Text instructions for editing
Returns:
Tuple containing (output_image_path, response_text, status_message)
"""
if image is None:
return None, "", "Please upload an image."
if not instruction or instruction.strip() == "":
return None, "", "Please provide an instruction."
if client is None:
return (
None,
"",
"Error: Google API key not found. Please set the GOOGLE_API_KEY environment variable.",
)
try:
return process_image_with_gemini(image, instruction)
except Exception as e:
return None, "", f"Unexpected error: {str(e)}"
with gr.Blocks(title="Gemini Image Editor") as app:
with gr.Column():
gr.Markdown("# 🖼️ Gemini Image Editor")
gr.Markdown(
"Upload an image and provide instructions for Gemini to edit it. The AI will generate a new image based on your instructions."
)
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Upload Image")
instruction = gr.Textbox(
label="Editing Instructions",
placeholder="Describe the edits you want to make...",
lines=3,
)
submit_btn = gr.Button("✨ Process Image", variant="primary")
with gr.Column():
output_image = gr.Image(label="Edited Image")
response_text = gr.Textbox(
label="Gemini's Response", lines=3, interactive=False
)
status = gr.Textbox(label="Status", interactive=False)
submit_btn.click(
fn=process_image,
inputs=[input_image, instruction],
outputs=[output_image, response_text, status],
)
# Add sample instructions (without example images)
gr.Markdown("### Sample Instructions (upload your own image and try these)")
sample_instructions = gr.Examples(
examples=[
"Make the sky more blue and add birds flying",
"Convert this to a watercolor painting style",
"Add a sunset effect to this image",
"Turn this into a night scene with stars",
"Make this look like it was taken in winter with snow",
],
inputs=instruction,
)
gr.Markdown(
"""
### Notes
- Processing may take up to 30 seconds
- If you need to duplicate this space, just remember to set the Google API key as an environment variable
""",
elem_classes="footer",
)
# Launch the app
if __name__ == "__main__":
print("Starting Gemini Image Editor...")
app.launch(ssr_mode=True)