|
import os |
|
import gradio as gr |
|
from google import genai |
|
from google.genai import types |
|
import tempfile |
|
import uuid |
|
from pathlib import Path |
|
|
|
|
|
client = None |
|
if os.environ.get("GOOGLE_API_KEY"): |
|
client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY")) |
|
|
|
|
|
def save_binary_file(file_name, data): |
|
with open(file_name, "wb") as f: |
|
f.write(data) |
|
return file_name |
|
|
|
|
|
def process_image_with_gemini(image, instruction) -> tuple[str, str, str]: |
|
|
|
output_dir = Path("output_gemini") |
|
output_dir.mkdir(exist_ok=True) |
|
|
|
|
|
request_id = f"request_{uuid.uuid4().hex[:8]}" |
|
request_folder = output_dir / request_id |
|
request_folder.mkdir(exist_ok=True) |
|
|
|
|
|
input_image_path = request_folder / "input.jpg" |
|
image.save(input_image_path) |
|
|
|
try: |
|
|
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
|
|
temp_image_path = Path(temp_dir) / "temp_input_image.jpg" |
|
image.save(temp_image_path) |
|
|
|
|
|
files = [ |
|
client.files.upload(file=str(temp_image_path)), |
|
] |
|
|
|
model = "gemini-2.0-flash-exp-image-generation" |
|
contents = [ |
|
types.Content( |
|
role="user", |
|
parts=[ |
|
types.Part.from_uri( |
|
file_uri=files[0].uri, |
|
mime_type="image/jpeg", |
|
), |
|
types.Part.from_text(text=instruction), |
|
], |
|
), |
|
] |
|
generate_content_config = types.GenerateContentConfig( |
|
temperature=1, |
|
top_p=0.95, |
|
top_k=40, |
|
max_output_tokens=8192, |
|
response_modalities=[ |
|
"image", |
|
"text", |
|
], |
|
safety_settings=[ |
|
types.SafetySetting( |
|
category="HARM_CATEGORY_CIVIC_INTEGRITY", |
|
threshold="OFF", |
|
), |
|
], |
|
response_mime_type="text/plain", |
|
) |
|
|
|
response_text = "" |
|
edited_image_path = None |
|
|
|
for chunk in client.models.generate_content_stream( |
|
model=model, |
|
contents=contents, |
|
config=generate_content_config, |
|
): |
|
if ( |
|
not chunk.candidates |
|
or not chunk.candidates[0].content |
|
or not chunk.candidates[0].content.parts |
|
): |
|
continue |
|
|
|
|
|
if hasattr(chunk.candidates[0].content.parts[0], "inline_data"): |
|
|
|
edited_image_path = request_folder / "edited.jpg" |
|
save_binary_file( |
|
str(edited_image_path), |
|
chunk.candidates[0].content.parts[0].inline_data.data, |
|
) |
|
|
|
elif hasattr(chunk.candidates[0].content.parts[0], "text"): |
|
response_text += chunk.candidates[0].content.parts[0].text |
|
|
|
|
|
if edited_image_path and edited_image_path.exists(): |
|
return str(edited_image_path), response_text or "", "Success" |
|
return None, response_text or "", "No image generated" |
|
|
|
except Exception as e: |
|
error_message = str(e) |
|
if ( |
|
"RESOURCE_EXHAUSTED" in error_message |
|
or "rate limit" in error_message.lower() |
|
): |
|
return None, "", "Rate limit exceeded. Please try again later." |
|
return None, "", f"Error: {error_message}" |
|
|
|
|
|
def process_image(image, instruction): |
|
"""Process an image with Gemini based on given instructions. |
|
|
|
Args: |
|
image: Input PIL image |
|
instruction: Text instructions for editing |
|
|
|
Returns: |
|
Tuple containing (output_image_path, response_text, status_message) |
|
""" |
|
if image is None: |
|
return None, "", "Please upload an image." |
|
|
|
if not instruction or instruction.strip() == "": |
|
return None, "", "Please provide an instruction." |
|
|
|
if client is None: |
|
return ( |
|
None, |
|
"", |
|
"Error: Google API key not found. Please set the GOOGLE_API_KEY environment variable.", |
|
) |
|
|
|
try: |
|
return process_image_with_gemini(image, instruction) |
|
except Exception as e: |
|
return None, "", f"Unexpected error: {str(e)}" |
|
|
|
|
|
with gr.Blocks(title="Gemini Image Editor") as app: |
|
with gr.Column(): |
|
gr.Markdown("# 🖼️ Gemini Image Editor") |
|
gr.Markdown( |
|
"Upload an image and provide instructions for Gemini to edit it. The AI will generate a new image based on your instructions." |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
input_image = gr.Image(type="pil", label="Upload Image") |
|
instruction = gr.Textbox( |
|
label="Editing Instructions", |
|
placeholder="Describe the edits you want to make...", |
|
lines=3, |
|
) |
|
submit_btn = gr.Button("✨ Process Image", variant="primary") |
|
|
|
with gr.Column(): |
|
output_image = gr.Image(label="Edited Image") |
|
response_text = gr.Textbox( |
|
label="Gemini's Response", lines=3, interactive=False |
|
) |
|
status = gr.Textbox(label="Status", interactive=False) |
|
|
|
submit_btn.click( |
|
fn=process_image, |
|
inputs=[input_image, instruction], |
|
outputs=[output_image, response_text, status], |
|
) |
|
|
|
|
|
gr.Markdown("### Sample Instructions (upload your own image and try these)") |
|
sample_instructions = gr.Examples( |
|
examples=[ |
|
"Make the sky more blue and add birds flying", |
|
"Convert this to a watercolor painting style", |
|
"Add a sunset effect to this image", |
|
"Turn this into a night scene with stars", |
|
"Make this look like it was taken in winter with snow", |
|
], |
|
inputs=instruction, |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
### Notes |
|
- Processing may take up to 30 seconds |
|
- If you need to duplicate this space, just remember to set the Google API key as an environment variable |
|
""", |
|
elem_classes="footer", |
|
) |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
print("Starting Gemini Image Editor...") |
|
app.launch(ssr_mode=True) |
|
|