import os import gradio as gr from google import genai from google.genai import types import tempfile import uuid from pathlib import Path # Initialize the Gemini client globally client = None if os.environ.get("GOOGLE_API_KEY"): client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY")) def save_binary_file(file_name, data): with open(file_name, "wb") as f: f.write(data) return file_name def process_image_with_gemini(image, instruction) -> tuple[str, str, str]: # Create output directory if it doesn't exist output_dir = Path("output_gemini") output_dir.mkdir(exist_ok=True) # Generate a unique ID for this request request_id = f"request_{uuid.uuid4().hex[:8]}" request_folder = output_dir / request_id request_folder.mkdir(exist_ok=True) # Save the input image to the request folder input_image_path = request_folder / "input.jpg" image.save(input_image_path) try: # Create a temporary directory that will be automatically cleaned up with tempfile.TemporaryDirectory() as temp_dir: # Save the image to a temporary file temp_image_path = Path(temp_dir) / "temp_input_image.jpg" image.save(temp_image_path) # Upload the temporary file to Gemini API using global client files = [ client.files.upload(file=str(temp_image_path)), ] model = "gemini-2.0-flash-exp-image-generation" contents = [ types.Content( role="user", parts=[ types.Part.from_uri( file_uri=files[0].uri, mime_type="image/jpeg", ), types.Part.from_text(text=instruction), ], ), ] generate_content_config = types.GenerateContentConfig( temperature=1, top_p=0.95, top_k=40, max_output_tokens=8192, response_modalities=[ "image", "text", ], safety_settings=[ types.SafetySetting( category="HARM_CATEGORY_CIVIC_INTEGRITY", threshold="OFF", # Off ), ], response_mime_type="text/plain", ) response_text = "" edited_image_path = None for chunk in client.models.generate_content_stream( model=model, contents=contents, config=generate_content_config, ): if ( not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts ): continue # Handle image response if hasattr(chunk.candidates[0].content.parts[0], "inline_data"): # Save the generated image edited_image_path = request_folder / "edited.jpg" save_binary_file( str(edited_image_path), chunk.candidates[0].content.parts[0].inline_data.data, ) # Handle text response elif hasattr(chunk.candidates[0].content.parts[0], "text"): response_text += chunk.candidates[0].content.parts[0].text # Simplify the return statement and ensure consistent types if edited_image_path and edited_image_path.exists(): return str(edited_image_path), response_text or "", "Success" return None, response_text or "", "No image generated" except Exception as e: error_message = str(e) if ( "RESOURCE_EXHAUSTED" in error_message or "rate limit" in error_message.lower() ): return None, "", "Rate limit exceeded. Please try again later." return None, "", f"Error: {error_message}" def process_image(image, instruction): """Process an image with Gemini based on given instructions. Args: image: Input PIL image instruction: Text instructions for editing Returns: Tuple containing (output_image_path, response_text, status_message) """ if image is None: return None, "", "Please upload an image." if not instruction or instruction.strip() == "": return None, "", "Please provide an instruction." if client is None: return ( None, "", "Error: Google API key not found. Please set the GOOGLE_API_KEY environment variable.", ) try: return process_image_with_gemini(image, instruction) except Exception as e: return None, "", f"Unexpected error: {str(e)}" with gr.Blocks(title="Gemini Image Editor") as app: with gr.Column(): gr.Markdown("# 🖼️ Gemini Image Editor") gr.Markdown( "Upload an image and provide instructions for Gemini to edit it. The AI will generate a new image based on your instructions." ) with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Upload Image") instruction = gr.Textbox( label="Editing Instructions", placeholder="Describe the edits you want to make...", lines=3, ) submit_btn = gr.Button("✨ Process Image", variant="primary") with gr.Column(): output_image = gr.Image(label="Edited Image") response_text = gr.Textbox( label="Gemini's Response", lines=3, interactive=False ) status = gr.Textbox(label="Status", interactive=False) submit_btn.click( fn=process_image, inputs=[input_image, instruction], outputs=[output_image, response_text, status], ) # Add sample instructions (without example images) gr.Markdown("### Sample Instructions (upload your own image and try these)") sample_instructions = gr.Examples( examples=[ "Make the sky more blue and add birds flying", "Convert this to a watercolor painting style", "Add a sunset effect to this image", "Turn this into a night scene with stars", "Make this look like it was taken in winter with snow", ], inputs=instruction, ) gr.Markdown( """ ### Notes - Processing may take up to 30 seconds - If you need to duplicate this space, just remember to set the Google API key as an environment variable """, elem_classes="footer", ) # Launch the app if __name__ == "__main__": print("Starting Gemini Image Editor...") app.launch(ssr_mode=True)