Spaces:

taesiri
/

Gemini-Text-based-Image-Editor

Running

App Files Files Community

taesiri commited on Mar 18

Commit

37696a6

verified ·

1 Parent(s): fd6bc5b

Upload 2 files

Browse files

Files changed (2) hide show

app.py +205 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import os
+import gradio as gr
+from google import genai
+from google.genai import types
+import tempfile
+import uuid
+from pathlib import Path
+# Initialize the Gemini client globally
+client = None
+if os.environ.get("GOOGLE_API_KEY"):
+    client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
+def save_binary_file(file_name, data):
+    with open(file_name, "wb") as f:
+        f.write(data)
+    return file_name
+def process_image_with_gemini(image, instruction) -> tuple[str, str, str]:
+    # Create output directory if it doesn't exist
+    output_dir = Path("output_gemini")
+    output_dir.mkdir(exist_ok=True)
+    # Generate a unique ID for this request
+    request_id = f"request_{uuid.uuid4().hex[:8]}"
+    request_folder = output_dir / request_id
+    request_folder.mkdir(exist_ok=True)
+    # Save the input image to the request folder
+    input_image_path = request_folder / "input.jpg"
+    image.save(input_image_path)
+    try:
+        # Create a temporary directory that will be automatically cleaned up
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Save the image to a temporary file
+            temp_image_path = Path(temp_dir) / "temp_input_image.jpg"
+            image.save(temp_image_path)
+            # Upload the temporary file to Gemini API using global client
+            files = [
+                client.files.upload(file=str(temp_image_path)),
+            ]
+            model = "gemini-2.0-flash-exp-image-generation"
+            contents = [
+                types.Content(
+                    role="user",
+                    parts=[
+                        types.Part.from_uri(
+                            file_uri=files[0].uri,
+                            mime_type="image/jpeg",
+                        ),
+                        types.Part.from_text(text=instruction),
+                    ],
+                ),
+            ]
+            generate_content_config = types.GenerateContentConfig(
+                temperature=1,
+                top_p=0.95,
+                top_k=40,
+                max_output_tokens=8192,
+                response_modalities=[
+                    "image",
+                    "text",
+                ],
+                safety_settings=[
+                    types.SafetySetting(
+                        category="HARM_CATEGORY_CIVIC_INTEGRITY",
+                        threshold="OFF",  # Off
+                    ),
+                ],
+                response_mime_type="text/plain",
+            )
+            response_text = ""
+            edited_image_path = None
+            for chunk in client.models.generate_content_stream(
+                model=model,
+                contents=contents,
+                config=generate_content_config,
+            ):
+                if (
+                    not chunk.candidates
+                    or not chunk.candidates[0].content
+                    or not chunk.candidates[0].content.parts
+                ):
+                    continue
+                # Handle image response
+                if hasattr(chunk.candidates[0].content.parts[0], "inline_data"):
+                    # Save the generated image
+                    edited_image_path = request_folder / "edited.jpg"
+                    save_binary_file(
+                        str(edited_image_path),
+                        chunk.candidates[0].content.parts[0].inline_data.data,
+                    )
+                # Handle text response
+                elif hasattr(chunk.candidates[0].content.parts[0], "text"):
+                    response_text += chunk.candidates[0].content.parts[0].text
+            # Simplify the return statement and ensure consistent types
+            if edited_image_path and edited_image_path.exists():
+                return str(edited_image_path), response_text or "", "Success"
+            return None, response_text or "", "No image generated"
+    except Exception as e:
+        error_message = str(e)
+        if (
+            "RESOURCE_EXHAUSTED" in error_message
+            or "rate limit" in error_message.lower()
+        ):
+            return None, "", "Rate limit exceeded. Please try again later."
+        return None, "", f"Error: {error_message}"
+def process_image(image, instruction):
+    """Process an image with Gemini based on given instructions.
+    Args:
+        image: Input PIL image
+        instruction: Text instructions for editing
+    Returns:
+        Tuple containing (output_image_path, response_text, status_message)
+    """
+    if image is None:
+        return None, "", "Please upload an image."
+    if not instruction or instruction.strip() == "":
+        return None, "", "Please provide an instruction."
+    if client is None:
+        return (
+            None,
+            "",
+            "Error: Google API key not found. Please set the GOOGLE_API_KEY environment variable.",
+        )
+    try:
+        return process_image_with_gemini(image, instruction)
+    except Exception as e:
+        return None, "", f"Unexpected error: {str(e)}"
+with gr.Blocks(title="Gemini Image Editor") as app:
+    with gr.Column():
+        gr.Markdown("# 🖼️ Gemini Image Editor")
+        gr.Markdown(
+            "Upload an image and provide instructions for Gemini to edit it. The AI will generate a new image based on your instructions."
+        )
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(type="pil", label="Upload Image")
+                instruction = gr.Textbox(
+                    label="Editing Instructions",
+                    placeholder="Describe the edits you want to make...",
+                    lines=3,
+                )
+                submit_btn = gr.Button("✨ Process Image", variant="primary")
+            with gr.Column():
+                output_image = gr.Image(label="Edited Image")
+                response_text = gr.Textbox(
+                    label="Gemini's Response", lines=3, interactive=False
+                )
+                status = gr.Textbox(label="Status", interactive=False)
+        submit_btn.click(
+            fn=process_image,
+            inputs=[input_image, instruction],
+            outputs=[output_image, response_text, status],
+        )
+        # Add sample instructions (without example images)
+        gr.Markdown("### Sample Instructions (upload your own image and try these)")
+        sample_instructions = gr.Examples(
+            examples=[
+                "Make the sky more blue and add birds flying",
+                "Convert this to a watercolor painting style",
+                "Add a sunset effect to this image",
+                "Turn this into a night scene with stars",
+                "Make this look like it was taken in winter with snow",
+            ],
+            inputs=instruction,
+        )
+        gr.Markdown(
+            """
+            ### Notes
+            - Processing may take up to 30 seconds
+            - If you need to duplicate this space, just remember to set the Google API key as an environment variable
+            """,
+            elem_classes="footer",
+        )
+# Launch the app
+if __name__ == "__main__":
+    print("Starting Gemini Image Editor...")
+    app.launch(ssr_mode=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ google-genai
2	+ gradio