Spaces:

taesiri
/

Gemini-Text-based-Image-Editor

Running

File size: 7,192 Bytes

37696a6

import os
import gradio as gr
from google import genai
from google.genai import types
import tempfile
import uuid
from pathlib import Path

# Initialize the Gemini client globally
client = None
if os.environ.get("GOOGLE_API_KEY"):
    client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))


def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)
    return file_name


def process_image_with_gemini(image, instruction) -> tuple[str, str, str]:
    # Create output directory if it doesn't exist
    output_dir = Path("output_gemini")
    output_dir.mkdir(exist_ok=True)

    # Generate a unique ID for this request
    request_id = f"request_{uuid.uuid4().hex[:8]}"
    request_folder = output_dir / request_id
    request_folder.mkdir(exist_ok=True)

    # Save the input image to the request folder
    input_image_path = request_folder / "input.jpg"
    image.save(input_image_path)

    try:
        # Create a temporary directory that will be automatically cleaned up
        with tempfile.TemporaryDirectory() as temp_dir:
            # Save the image to a temporary file
            temp_image_path = Path(temp_dir) / "temp_input_image.jpg"
            image.save(temp_image_path)

            # Upload the temporary file to Gemini API using global client
            files = [
                client.files.upload(file=str(temp_image_path)),
            ]

            model = "gemini-2.0-flash-exp-image-generation"
            contents = [
                types.Content(
                    role="user",
                    parts=[
                        types.Part.from_uri(
                            file_uri=files[0].uri,
                            mime_type="image/jpeg",
                        ),
                        types.Part.from_text(text=instruction),
                    ],
                ),
            ]
            generate_content_config = types.GenerateContentConfig(
                temperature=1,
                top_p=0.95,
                top_k=40,
                max_output_tokens=8192,
                response_modalities=[
                    "image",
                    "text",
                ],
                safety_settings=[
                    types.SafetySetting(
                        category="HARM_CATEGORY_CIVIC_INTEGRITY",
                        threshold="OFF",  # Off
                    ),
                ],
                response_mime_type="text/plain",
            )

            response_text = ""
            edited_image_path = None

            for chunk in client.models.generate_content_stream(
                model=model,
                contents=contents,
                config=generate_content_config,
            ):
                if (
                    not chunk.candidates
                    or not chunk.candidates[0].content
                    or not chunk.candidates[0].content.parts
                ):
                    continue

                # Handle image response
                if hasattr(chunk.candidates[0].content.parts[0], "inline_data"):
                    # Save the generated image
                    edited_image_path = request_folder / "edited.jpg"
                    save_binary_file(
                        str(edited_image_path),
                        chunk.candidates[0].content.parts[0].inline_data.data,
                    )
                # Handle text response
                elif hasattr(chunk.candidates[0].content.parts[0], "text"):
                    response_text += chunk.candidates[0].content.parts[0].text

            # Simplify the return statement and ensure consistent types
            if edited_image_path and edited_image_path.exists():
                return str(edited_image_path), response_text or "", "Success"
            return None, response_text or "", "No image generated"

    except Exception as e:
        error_message = str(e)
        if (
            "RESOURCE_EXHAUSTED" in error_message
            or "rate limit" in error_message.lower()
        ):
            return None, "", "Rate limit exceeded. Please try again later."
        return None, "", f"Error: {error_message}"


def process_image(image, instruction):
    """Process an image with Gemini based on given instructions.

    Args:
        image: Input PIL image
        instruction: Text instructions for editing

    Returns:
        Tuple containing (output_image_path, response_text, status_message)
    """
    if image is None:
        return None, "", "Please upload an image."

    if not instruction or instruction.strip() == "":
        return None, "", "Please provide an instruction."

    if client is None:
        return (
            None,
            "",
            "Error: Google API key not found. Please set the GOOGLE_API_KEY environment variable.",
        )

    try:
        return process_image_with_gemini(image, instruction)
    except Exception as e:
        return None, "", f"Unexpected error: {str(e)}"


with gr.Blocks(title="Gemini Image Editor") as app:
    with gr.Column():
        gr.Markdown("# 🖼️ Gemini Image Editor")
        gr.Markdown(
            "Upload an image and provide instructions for Gemini to edit it. The AI will generate a new image based on your instructions."
        )

        with gr.Row():
            with gr.Column():
                input_image = gr.Image(type="pil", label="Upload Image")
                instruction = gr.Textbox(
                    label="Editing Instructions",
                    placeholder="Describe the edits you want to make...",
                    lines=3,
                )
                submit_btn = gr.Button("✨ Process Image", variant="primary")

            with gr.Column():
                output_image = gr.Image(label="Edited Image")
                response_text = gr.Textbox(
                    label="Gemini's Response", lines=3, interactive=False
                )
                status = gr.Textbox(label="Status", interactive=False)

        submit_btn.click(
            fn=process_image,
            inputs=[input_image, instruction],
            outputs=[output_image, response_text, status],
        )

        # Add sample instructions (without example images)
        gr.Markdown("### Sample Instructions (upload your own image and try these)")
        sample_instructions = gr.Examples(
            examples=[
                "Make the sky more blue and add birds flying",
                "Convert this to a watercolor painting style",
                "Add a sunset effect to this image",
                "Turn this into a night scene with stars",
                "Make this look like it was taken in winter with snow",
            ],
            inputs=instruction,
        )

        gr.Markdown(
            """
            ### Notes
            - Processing may take up to 30 seconds
            - If you need to duplicate this space, just remember to set the Google API key as an environment variable
            """,
            elem_classes="footer",
        )


# Launch the app
if __name__ == "__main__":
    print("Starting Gemini Image Editor...")
    app.launch(ssr_mode=True)