File size: 7,192 Bytes
37696a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import os
import gradio as gr
from google import genai
from google.genai import types
import tempfile
import uuid
from pathlib import Path

# Initialize the Gemini client globally
client = None
if os.environ.get("GOOGLE_API_KEY"):
    client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))


def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)
    return file_name


def process_image_with_gemini(image, instruction) -> tuple[str, str, str]:
    # Create output directory if it doesn't exist
    output_dir = Path("output_gemini")
    output_dir.mkdir(exist_ok=True)

    # Generate a unique ID for this request
    request_id = f"request_{uuid.uuid4().hex[:8]}"
    request_folder = output_dir / request_id
    request_folder.mkdir(exist_ok=True)

    # Save the input image to the request folder
    input_image_path = request_folder / "input.jpg"
    image.save(input_image_path)

    try:
        # Create a temporary directory that will be automatically cleaned up
        with tempfile.TemporaryDirectory() as temp_dir:
            # Save the image to a temporary file
            temp_image_path = Path(temp_dir) / "temp_input_image.jpg"
            image.save(temp_image_path)

            # Upload the temporary file to Gemini API using global client
            files = [
                client.files.upload(file=str(temp_image_path)),
            ]

            model = "gemini-2.0-flash-exp-image-generation"
            contents = [
                types.Content(
                    role="user",
                    parts=[
                        types.Part.from_uri(
                            file_uri=files[0].uri,
                            mime_type="image/jpeg",
                        ),
                        types.Part.from_text(text=instruction),
                    ],
                ),
            ]
            generate_content_config = types.GenerateContentConfig(
                temperature=1,
                top_p=0.95,
                top_k=40,
                max_output_tokens=8192,
                response_modalities=[
                    "image",
                    "text",
                ],
                safety_settings=[
                    types.SafetySetting(
                        category="HARM_CATEGORY_CIVIC_INTEGRITY",
                        threshold="OFF",  # Off
                    ),
                ],
                response_mime_type="text/plain",
            )

            response_text = ""
            edited_image_path = None

            for chunk in client.models.generate_content_stream(
                model=model,
                contents=contents,
                config=generate_content_config,
            ):
                if (
                    not chunk.candidates
                    or not chunk.candidates[0].content
                    or not chunk.candidates[0].content.parts
                ):
                    continue

                # Handle image response
                if hasattr(chunk.candidates[0].content.parts[0], "inline_data"):
                    # Save the generated image
                    edited_image_path = request_folder / "edited.jpg"
                    save_binary_file(
                        str(edited_image_path),
                        chunk.candidates[0].content.parts[0].inline_data.data,
                    )
                # Handle text response
                elif hasattr(chunk.candidates[0].content.parts[0], "text"):
                    response_text += chunk.candidates[0].content.parts[0].text

            # Simplify the return statement and ensure consistent types
            if edited_image_path and edited_image_path.exists():
                return str(edited_image_path), response_text or "", "Success"
            return None, response_text or "", "No image generated"

    except Exception as e:
        error_message = str(e)
        if (
            "RESOURCE_EXHAUSTED" in error_message
            or "rate limit" in error_message.lower()
        ):
            return None, "", "Rate limit exceeded. Please try again later."
        return None, "", f"Error: {error_message}"


def process_image(image, instruction):
    """Process an image with Gemini based on given instructions.

    Args:
        image: Input PIL image
        instruction: Text instructions for editing

    Returns:
        Tuple containing (output_image_path, response_text, status_message)
    """
    if image is None:
        return None, "", "Please upload an image."

    if not instruction or instruction.strip() == "":
        return None, "", "Please provide an instruction."

    if client is None:
        return (
            None,
            "",
            "Error: Google API key not found. Please set the GOOGLE_API_KEY environment variable.",
        )

    try:
        return process_image_with_gemini(image, instruction)
    except Exception as e:
        return None, "", f"Unexpected error: {str(e)}"


with gr.Blocks(title="Gemini Image Editor") as app:
    with gr.Column():
        gr.Markdown("# 🖼️ Gemini Image Editor")
        gr.Markdown(
            "Upload an image and provide instructions for Gemini to edit it. The AI will generate a new image based on your instructions."
        )

        with gr.Row():
            with gr.Column():
                input_image = gr.Image(type="pil", label="Upload Image")
                instruction = gr.Textbox(
                    label="Editing Instructions",
                    placeholder="Describe the edits you want to make...",
                    lines=3,
                )
                submit_btn = gr.Button("✨ Process Image", variant="primary")

            with gr.Column():
                output_image = gr.Image(label="Edited Image")
                response_text = gr.Textbox(
                    label="Gemini's Response", lines=3, interactive=False
                )
                status = gr.Textbox(label="Status", interactive=False)

        submit_btn.click(
            fn=process_image,
            inputs=[input_image, instruction],
            outputs=[output_image, response_text, status],
        )

        # Add sample instructions (without example images)
        gr.Markdown("### Sample Instructions (upload your own image and try these)")
        sample_instructions = gr.Examples(
            examples=[
                "Make the sky more blue and add birds flying",
                "Convert this to a watercolor painting style",
                "Add a sunset effect to this image",
                "Turn this into a night scene with stars",
                "Make this look like it was taken in winter with snow",
            ],
            inputs=instruction,
        )

        gr.Markdown(
            """
            ### Notes
            - Processing may take up to 30 seconds
            - If you need to duplicate this space, just remember to set the Google API key as an environment variable
            """,
            elem_classes="footer",
        )


# Launch the app
if __name__ == "__main__":
    print("Starting Gemini Image Editor...")
    app.launch(ssr_mode=True)