Spaces:

rdesai2
/

LoRACaptioner

Running

File size: 25,339 Bytes

import gradio as gr
import os
import zipfile
from io import BytesIO
import time
import tempfile
from pathlib import Path
import shutil

from main import process_images
from prompt import optimize_prompt

# Maximum number of images
MAX_IMAGES = 30

# ------- File Operations -------

def create_download_file(image_paths, captions):
    """Create a zip file with images and their captions"""
    zip_io = BytesIO()
    with zipfile.ZipFile(zip_io, 'w') as zip_file:
        for i, (image_path, caption) in enumerate(zip(image_paths, captions)):
            # Get original filename without extension
            base_name = os.path.splitext(os.path.basename(image_path))[0]
            img_name = f"{base_name}.png"
            caption_name = f"{base_name}.txt"
            
            # Add image to zip
            with open(image_path, 'rb') as img_file:
                zip_file.writestr(img_name, img_file.read())
            
            # Add caption to zip
            zip_file.writestr(caption_name, caption)
    
    return zip_io.getvalue()

def process_uploaded_images(image_paths, batch_by_category=False):
    """Process uploaded images using main.py's functions"""
    # Create temporary directories for input and output
    with tempfile.TemporaryDirectory() as temp_input_dir, tempfile.TemporaryDirectory() as temp_output_dir:
        # Copy all images to the temporary input directory
        temp_input_path = Path(temp_input_dir)
        temp_output_path = Path(temp_output_dir)
        
        # Map of original paths to filenames in temp dir
        path_mapping = {}
        
        for i, path in enumerate(image_paths):
            # Keep original filename to preserve categorization
            filename = os.path.basename(path)
            temp_path = temp_input_path / filename
            
            # Copy file to temp directory
            shutil.copy2(path, temp_path)
            path_mapping[str(temp_path)] = str(path)
            
        # Process the images using main.py's function
        process_images(temp_input_dir, temp_output_dir, batch_images=batch_by_category)
        
        # Collect the captions from the output directory
        captions = []
        for orig_path in image_paths:
            # Get the base filename without extension
            base_name = os.path.splitext(os.path.basename(orig_path))[0]
            caption_filename = f"{base_name}.txt"
            caption_path = temp_output_path / caption_filename
            
            # If caption file exists, read it; otherwise use empty string
            if os.path.exists(caption_path):
                with open(caption_path, 'r', encoding='utf-8') as f:
                    caption = f.read()
                captions.append(caption)
            else:
                captions.append("")
        
        return captions

# ------- UI Interaction Functions -------

def load_captioning(files):
    """Process uploaded images and show them in the UI"""
    if not files:
        return [], gr.update(visible=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False), gr.update(value="Upload images to begin"), *[gr.update(visible=False) for _ in range(MAX_IMAGES)]
    
    # Filter to only keep image files
    image_paths = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'))]
    
    if not image_paths or len(image_paths) < 1:
        gr.Warning(f"Please upload at least one image")
        return [], gr.update(visible=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False), gr.update(value="No valid images found"), *[gr.update(visible=False) for _ in range(MAX_IMAGES)]
    
    if len(image_paths) > MAX_IMAGES:
        gr.Warning(f"Only the first {MAX_IMAGES} images will be processed")
        image_paths = image_paths[:MAX_IMAGES]
    
    # Update row visibility
    row_updates = []
    for i in range(MAX_IMAGES):
        if i < len(image_paths):
            row_updates.append(gr.update(visible=True))
        else:
            row_updates.append(gr.update(visible=False))
    
    return (
        image_paths,  # stored_image_paths
        gr.update(visible=True),  # captioning_area
        gr.update(interactive=True),  # caption_btn
        gr.update(interactive=False),  # download_btn - initially disabled until captioning is done
        gr.update(visible=False),  # download_output
        gr.update(value=f"{len(image_paths)} images ready for captioning"),  # status_text
        *row_updates  # image_rows
    )

def update_images(image_paths):
    """Update the image components with the uploaded images"""
    print(f"Updating images with paths: {image_paths}")
    updates = []
    for i in range(MAX_IMAGES):
        if i < len(image_paths):
            updates.append(gr.update(value=image_paths[i]))
        else:
            updates.append(gr.update(value=None))
    return updates

def update_caption_labels(image_paths):
    """Update caption labels to include the image filename"""
    updates = []
    for i in range(MAX_IMAGES):
        if i < len(image_paths):
            filename = os.path.basename(image_paths[i])
            updates.append(gr.update(label=filename))
        else:
            updates.append(gr.update(label=""))
    return updates

def run_captioning(image_paths, batch_category):
    """Generate captions for the images using the main.py functions"""
    if not image_paths:
        return [gr.update(value="") for _ in range(MAX_IMAGES)] + [gr.update(value="No images to process")]
            
    try:
        print(f"Starting captioning for {len(image_paths)} images, batch_by_category={batch_category}")
        captions = process_uploaded_images(image_paths, batch_category)
        
        # Count valid captions
        valid_captions = sum(1 for c in captions if c and c.strip())
        print(f"Generated {valid_captions} valid captions out of {len(captions)} images")
        
        if valid_captions < len(captions):
            gr.Warning(f"{len(captions) - valid_captions} images could not be captioned properly")
            status = gr.update(value=f"✅ Captioning complete - {valid_captions}/{len(captions)} successful")
        else:
            gr.Info("Captioning complete!")
            status = gr.update(value="✅ Captioning complete")
                
        print("Sample captions:", captions[:2] if len(captions) >= 2 else captions)
    except Exception as e:
        print(f"Error in captioning: {str(e)}")
        gr.Error(f"Captioning failed: {str(e)}")
        captions = [""] * len(image_paths)  # Use empty strings
        status = gr.update(value=f"❌ Error: {str(e)}")
    
    # Update caption textboxes
    caption_updates = []
    for i in range(MAX_IMAGES):
        if i < len(captions) and captions[i]:  # Only set value if we have a valid caption
            caption_updates.append(gr.update(value=captions[i]))
        else:
            caption_updates.append(gr.update(value=""))
    
    print(f"Returning {len(caption_updates)} caption updates")
    return caption_updates + [status]

def update_batch_setting(value):
    """Update the batch by category setting"""
    return value

def create_zip_from_ui(image_paths, *captions_list):
    """Create a zip file from the current images and captions in the UI"""
    # Filter out empty captions for non-existent images
    valid_captions = [cap for i, cap in enumerate(captions_list) if i < len(image_paths) and cap]
    valid_image_paths = image_paths[:len(valid_captions)]
    
    if not valid_image_paths:
        gr.Warning("No images to download")
        return None
    
    # Create zip file
    zip_data = create_download_file(valid_image_paths, valid_captions)
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    
    # Create a temporary file to store the zip
    temp_dir = tempfile.gettempdir()
    zip_filename = f"image_captions_{timestamp}.zip"
    zip_path = os.path.join(temp_dir, zip_filename)
    
    # Write the zip data to the temporary file
    with open(zip_path, "wb") as f:
        f.write(zip_data)
    
    # Return the path to the temporary file
    return zip_path

def process_upload(files, image_rows, image_components, caption_components):
    """Process uploaded files and update UI components"""
    # First get paths and visibility updates
    image_paths, captioning_update, caption_btn_update, download_btn_update, download_output_update, status_update, *row_updates = load_captioning(files)
    
    # Then get image updates
    image_updates = update_images(image_paths)
    
    # Update caption labels with filenames
    caption_label_updates = update_caption_labels(image_paths)
    
    # Return all updates together
    return [image_paths, captioning_update, caption_btn_update, download_btn_update, download_output_update, status_update] + row_updates + image_updates + caption_label_updates

def on_captioning_start():
    """Update UI when captioning starts"""
    return gr.update(value="⏳ Processing captions... please wait"), gr.update(interactive=False)

def on_captioning_complete():
    """Update UI when captioning completes"""
    return gr.update(value="✅ Captioning complete"), gr.update(interactive=True), gr.update(interactive=True)

# ------- UI Style Definitions -------

def get_css_styles():
    """Return CSS styles for the UI"""
    return """
    <style>
    /* Unified styling for the two-column layout */
    #left-column, #right-column {
        padding: 10px;
        align-self: flex-start;
    }
    
    /* Force columns to align at the top */
    .gradio-row {
        align-items: flex-start !important;
    }
    
    /* File upload styling */
    .file-types-info {
        margin-top: 0px;
        font-size: 0.9em;
        color: #666;
    }
    
    .file-upload-container {
        width: 100%;
        max-width: 100%;
    }
    
    .file-upload-container .file-preview {
        max-height: 180px;
        overflow-y: auto;
    }
    
    /* Image and caption rows styling */
    .image-caption-row {
        margin-bottom: 10px;
        padding: 5px;
        border-bottom: 1px solid #eee;
    }
    
    /* Make thumbnails same size */
    .image-thumbnail {
        height: 100%;
        width: 100%;
        object-fit: contain;
    }
    
    /* Center the image thumbnails */
    #left-column, .image-caption-row > div:first-child {
        display: flex;
        justify-content: center;
        align-items: center;
    }
    
    /* Ensure the image container itself is centered */
    .image-thumbnail img, .image-thumbnail > div {
        margin: 0 auto;
    }
    
    /* Caption text areas */
    .caption-area {
        height: 100%;
        display: flex;
        flex-direction: column;
    }
    
    /* Download section */
    .download-section {
        margin-top: 10px;
    }
    
    /* Category info */
    .category-info {
        font-size: 0.9em;
        color: #555;
        background-color: #f8f9fa;
        padding: 8px;
        border-radius: 4px;
        margin-bottom: 10px;
        border-left: 3px solid #4CAF50;
    }
    
    /* Tab styling */
    .tabs {
        margin-top: 20px;
    }
    
    /* Prompt optimization tab styling */
    .optimization-status {
        margin-top: 10px;
        padding: 8px;
        border-radius: 4px;
        background-color: #f8f9fa;
    }
    
    /* Input/output boxes for prompt optimization */
    .prompt-box {
        margin-bottom: 15px;
    }
    
    /* Make optimize button stand out */
    .optimize-btn {
        margin-top: 10px;
        margin-bottom: 15px;
    }
    </style>
    """

# ------- UI Component Creation -------

def create_upload_area():
    """Create the upload area components"""
    # Left column for images/upload
    with gr.Column(scale=1, elem_id="left-column") as upload_column:
        # Upload area
        gr.Markdown("### Upload your images", elem_id="upload-heading")
        gr.Markdown("Only .png, .jpg, .jpeg, and .webp files are supported", elem_id="file-types-info", elem_classes="file-types-info")
        image_upload = gr.File(
            file_count="multiple", 
            label="Drop your files here", 
            file_types=["image"],
            type="filepath",
            height=220,
            elem_classes="file-upload-container",
        )
    
    return upload_column, image_upload

def create_config_area():
    """Create the configuration area components"""
    # Right column for configuration and captions
    with gr.Column(scale=1.5, elem_id="right-column") as config_column:
        # Configuration area
        gr.Markdown("### Configuration")
        batch_category_checkbox = gr.Checkbox(
            label="Batch process by category", 
            value=False,
            info="Caption similar images together"
        )
        
        gr.Markdown("""
        **Note about categorization:**
        - Images are grouped by the part of the filename before the last underscore
        - For example: "character_pose_1.png" and "character_pose_2.png" share the category "character_pose"
        - When using "Batch process by category", similar images are captioned together for more consistent results
        """, elem_classes=["category-info"])
        
        caption_btn = gr.Button("Caption Images", variant="primary", interactive=False)
        download_btn = gr.Button("Download Images + Captions", variant="secondary", interactive=False)
        download_output = gr.File(label="Download Zip", visible=False)
        status_text = gr.Markdown("Upload images to begin", visible=True)
    
    return config_column, batch_category_checkbox, caption_btn, download_btn, download_output, status_text

def create_captioning_area():
    """Create the captioning area components"""
    with gr.Column(visible=False) as captioning_area:
        # Replace the single heading with a row containing two headings
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### Your Images", elem_id="images-heading")
            with gr.Column(scale=1.5):
                gr.Markdown("### Your Captions", elem_id="captions-heading")
        
        # Create individual image and caption rows
        image_rows = []
        image_components = []
        caption_components = []
        
        for i in range(MAX_IMAGES):
            with gr.Row(visible=False, elem_classes=["image-caption-row"]) as img_row:
                image_rows.append(img_row)
                
                # Left column for image
                with gr.Column(scale=1):
                    img = gr.Image(
                        label=f"Image {i+1}",
                        type="filepath",
                        show_label=False, 
                        height=200,
                        width=200,
                        elem_classes=["image-thumbnail"]
                    )
                    image_components.append(img)
                
                # Right column for caption
                with gr.Column(scale=1.5):
                    caption = gr.Textbox(
                        label=f"Caption {i+1}",
                        lines=3,
                        elem_classes=["caption-area"]
                    )
                    caption_components.append(caption)
    
    return captioning_area, image_rows, image_components, caption_components

def setup_event_handlers(
    image_upload, stored_image_paths, captioning_area, caption_btn, download_btn, 
    download_output, status_text, image_rows, image_components, caption_components,
    batch_category_checkbox, batch_by_category, shared_captions=None
):
    """Set up all event handlers for the UI"""
    # Combined outputs for the upload function
    upload_outputs = [
        stored_image_paths,
        captioning_area,
        caption_btn,
        download_btn,
        download_output,
        status_text,
        *image_rows
    ]
    
    combined_outputs = upload_outputs + image_components + caption_components
    
    # Set up upload handler
    image_upload.change(
        lambda files: process_upload(files, image_rows, image_components, caption_components),
        inputs=[image_upload],
        outputs=combined_outputs
    )
    
    # Set up batch category checkbox
    batch_category_checkbox.change(
        update_batch_setting,
        inputs=[batch_category_checkbox],
        outputs=[batch_by_category]
    )
    
    # Set up captioning button chain
    caption_chain = caption_btn.click(
        on_captioning_start,
        inputs=None,
        outputs=[status_text, caption_btn]
    ).success(
        run_captioning,
        inputs=[stored_image_paths, batch_by_category],
        outputs=caption_components + [status_text]
    ).success(
        on_captioning_complete,
        inputs=None,
        outputs=[status_text, caption_btn, download_btn]
    )
    
    # If shared_captions is provided, add an additional handler to update it
    if shared_captions is not None:
        def extract_valid_captions(*caption_values):
            return [c for c in caption_values if c and c.strip()]
        
        caption_chain.success(
            extract_valid_captions,
            inputs=caption_components,
            outputs=[shared_captions]
        )
    
    # Set up download button
    download_btn.click(
        create_zip_from_ui,
        inputs=[stored_image_paths] + caption_components,
        outputs=[download_output]
    ).then(
        lambda: gr.update(visible=True, elem_classes=["download-section"]),
        inputs=None,
        outputs=[download_output]
    ).then(
        lambda: gr.Info("Click the Download button that appeared to save your zip file"),
        inputs=None,
        outputs=None
    )

# ------- Prompt Optimization UI -------

def create_prompt_optimization_ui():
    """Create UI components for prompt optimization tab"""
    with gr.Column(scale=1) as left_column:
        # Left side for caption input
        gr.Markdown("### Upload Captions")
        gr.Markdown("Upload caption files (.txt) or enter captions manually", elem_classes="file-types-info")
        
        captions_upload = gr.File(
            file_count="multiple", 
            label="Upload caption files", 
            file_types=[".txt"],
            type="filepath",
            elem_classes="file-upload-container",
            height=220
        )
        
        manual_captions = gr.Textbox(
            label="Or enter captions manually",
            lines=5,
            placeholder="Enter captions here, one per line",
            elem_classes="prompt-box"
        )
        
        # Add button to use captions from image captioning tab
        use_generated_captions = gr.Button("Use Captions from Manual Entry", variant="secondary")
    
    with gr.Column(scale=1) as right_column:
        # Right side for prompt input and output
        gr.Markdown("### Optimize Prompt")
        gr.Markdown("\n- Craft prompts that match the style of your training captions\n- Enter a simple prompt and receive an optimized version\n", elem_classes=["category-info"])
        
        user_prompt = gr.Textbox(
            label="Enter your prompt",
            lines=3,
            placeholder="Enter the prompt you want to optimize",
            elem_classes="prompt-box"
        )
        
        optimize_btn = gr.Button("Optimize Prompt", variant="primary", elem_classes="optimize-btn")
        
        optimized_prompt = gr.Textbox(
            label="Optimized Prompt",
            lines=5,
            placeholder="Optimized prompt will appear here",
            elem_classes="prompt-box"
        )
        
        optimization_status = gr.Markdown("Enter a prompt and upload captions to begin", elem_classes="optimization-status")
    
    # Return components but NOT info_md (will create it separately in build_ui)
    return (
        left_column, right_column, captions_upload, manual_captions, 
        use_generated_captions, user_prompt, optimize_btn, 
        optimized_prompt, optimization_status
    )

def run_optimization(prompt, caption_files, manual_caption_text):
    """Handle the prompt optimization logic"""
    if not prompt or prompt.strip() == "":
        return "", "Please enter a prompt to optimize"
    
    # Handle different input sources for captions
    caption_list = []
    
    if manual_caption_text and manual_caption_text.strip():
        # Use manually entered captions
        caption_list = [line.strip() for line in manual_caption_text.split("\n") if line.strip()]
    
    elif caption_files and len(caption_files) > 0:
        # Read captions from uploaded files
        for file_path in caption_files:
            if os.path.exists(file_path) and file_path.lower().endswith('.txt'):
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read().strip()
                    if content:
                        caption_list.append(content)
    
    if not caption_list:
        return "", "Please upload caption files or enter captions manually"
    
    try:
        # Call the optimize_prompt function from prompt.py
        result = optimize_prompt(prompt, captions_list=caption_list)
        return result, "✅ Prompt optimization complete"
    except Exception as e:
        return "", f"❌ Error optimizing prompt: {str(e)}"

def setup_prompt_optimization_handlers(
    captions_upload, manual_captions, use_generated_captions, 
    user_prompt, optimize_btn, optimized_prompt, 
    optimization_status, shared_captions
):
    """Set up event handlers for prompt optimization tab"""
    # Function to update manual captions with shared ones
    def fill_with_shared_captions(captions_list):
        if not captions_list or len(captions_list) == 0:
            return "No captions available. Generate captions in the Image Captioning tab first."
        return "\n".join(captions_list)
    
    # Connect button to fill manual captions area
    use_generated_captions.click(
        fill_with_shared_captions,
        inputs=[shared_captions],
        outputs=[manual_captions]
    )
    
    # Connect the optimize button to the optimization function
    optimize_btn.click(
        run_optimization,
        inputs=[user_prompt, captions_upload, manual_captions],
        outputs=[optimized_prompt, optimization_status]
    )

# ------- Main Application -------

def build_ui():
    """Build and return the Gradio interface"""
    with gr.Blocks() as demo:
        gr.Markdown("# Image Auto-captioner for LoRA Training")

        gr.Markdown("""Check out the [code](https://github.com/RishiDesai/LoRACaptioner)
                    and see my [blog post](https://rishidesai.github.io/posts/character-lora/) for more information.""")
        
        # Store generated captions for sharing between tabs
        shared_captions = gr.State([])
        
        # Create tabs for different functionality
        with gr.Tabs() as tabs:
            with gr.TabItem("Image Captioning") as captioning_tab:
                # Store uploaded images
                stored_image_paths = gr.State([])
                batch_by_category = gr.State(False)  # State to track if batch by category is enabled
                
                # Create a two-column layout for the entire interface
                with gr.Row():
                    # Create upload area in left column
                    _, image_upload = create_upload_area()
                    
                    # Create config area in right column
                    _, batch_category_checkbox, caption_btn, download_btn, download_output, status_text = create_config_area()
                
                # Create captioning area (initially hidden)
                captioning_area, image_rows, image_components, caption_components = create_captioning_area()
                
                # Set up event handlers with shared captions
                setup_event_handlers(
                    image_upload, stored_image_paths, captioning_area, caption_btn, download_btn,
                    download_output, status_text, image_rows, image_components, caption_components,
                    batch_category_checkbox, batch_by_category, shared_captions
                )
            
            with gr.TabItem("Prompt Optimization") as prompt_tab:
                with gr.Row():
                    # Create prompt optimization UI components
                    (
                        left_column, right_column, captions_upload, manual_captions,
                        use_generated_captions, user_prompt, optimize_btn,
                        optimized_prompt, optimization_status
                    ) = create_prompt_optimization_ui()
                
                # Set up prompt optimization event handlers
                setup_prompt_optimization_handlers(
                    captions_upload, manual_captions, use_generated_captions,
                    user_prompt, optimize_btn, optimized_prompt,
                    optimization_status, shared_captions
                )
        
        # Add CSS styling
        gr.HTML(get_css_styles())
    
    return demo

# Launch the app when run directly
if __name__ == "__main__":
    demo = build_ui()
    demo.launch(share=True)