Spaces:

rdesai2
/

LoRACaptioner

Running

App Files Files Community

Rishi Desai commited on 20 days ago

Commit

ab00f6b

1 Parent(s): 8373fd9

some clean up

Browse files

Files changed (3) hide show

caption.py +42 -46
main.py +25 -29
prompt.py +18 -26

caption.py CHANGED Viewed

@@ -2,12 +2,9 @@ import base64
 import io
 import os
 from together import Together
-from PIL import Image
-from dotenv import load_dotenv
-load_dotenv()
-def get_prompt():
     return """Automated Image Captioning (for LoRA Training)
 Role: You are an expert AI captioning system generating precise, structured descriptions for character images optimized for LoRA model training in Stable Diffusion and Flux.1-dev.
@@ -58,10 +55,12 @@ tr1gger photorealistic, long trench coat and combat boots, walking, determined,
 REMEMBER: Your response must be a single line starting with "tr1gger" and following the exact format above. No additional text, formatting, or explanations are allowed.
 """
 class CaptioningError(Exception):
     """Exception raised for errors in the captioning process."""
     pass
 def images_to_base64(images):
     """Convert a list of PIL images to base64 encoded strings."""
     image_strings = []
@@ -72,15 +71,17 @@ def images_to_base64(images):
         image_strings.append(img_str)
     return image_strings
 def get_together_client():
     """Initialize and return the Together API client."""
     api_key = os.environ.get("TOGETHER_API_KEY")
     if not api_key:
-        raise ValueError("TOGETHER_API_KEY is not set in the environment.")
     return Together(api_key=api_key)
-def extract_trigger_caption(line):
-    """Extract 'tr1gger' caption from a line of text."""
     if "tr1gger" in line:
         # If caption doesn't start with tr1gger but contains it, extract just that part
         if not line.startswith("tr1gger"):
@@ -88,10 +89,11 @@ def extract_trigger_caption(line):
         return line
     return ""
 def caption_single_image(client, img_str):
     """Process and caption a single image."""
     messages = [
-        {"role": "system", "content": get_prompt()},
         {
             "role": "user",
             "content": [
@@ -100,85 +102,81 @@ def caption_single_image(client, img_str):
             ]
         }
     ]
     # Request caption for the image using Llama 4 Maverick
     response = client.chat.completions.create(
         model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
         messages=messages
     )
-    # Extract caption from the response
     full_response = response.choices[0].message.content.strip()
-    # Look for the trigger line in the response
     caption = ""
     for line in full_response.splitlines():
-        caption = extract_trigger_caption(line)
         if caption:
             break
-    # Check if caption is valid
     if not caption:
         error_msg = "Failed to extract a valid caption (containing 'tr1gger') from the response"
         error_msg += f"\n\nActual response:\n{full_response}"
         raise CaptioningError(error_msg)
     return caption
-def caption_batch_images(client, image_strings, category):
     """Process and caption multiple images in a single batch request."""
     # Create a content array with all images
-    content = [{"type": "text", "text": f"Here is the batch of images for {category}. Please caption each image on a separate line, starting each caption with 'tr1gger'."}]
-    # Add all images to the content array
     for i, img_str in enumerate(image_strings):
         content.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}})
-        content.append({"type": "text", "text": f"Image {i+1}"})
-    # Send the batch request
     messages = [
-        {"role": "system", "content": get_prompt()},
         {"role": "user", "content": content}
     ]
     response = client.chat.completions.create(
         model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
         messages=messages
     )
     return process_batch_response(response, image_strings)
 def process_batch_response(response, image_strings):
     """Process the API response from a batch request and extract captions."""
-    # Parse the response to extract captions for each image
     full_response = response.choices[0].message.content.strip()
     lines = full_response.splitlines()
     # Extract captions from the response
     image_count = len(image_strings)
-    captions = [""] * image_count  # Initialize with empty strings
     # Extract lines that start with or contain "tr1gger"
-    tr1gger_lines = [line for line in lines if "tr1gger" in line]
     # Assign captions to images
     for i in range(image_count):
-        if i < len(tr1gger_lines):
-            caption = extract_trigger_caption(tr1gger_lines[i])
             captions[i] = caption
     validate_batch_captions(captions, image_count, full_response)
     return captions
 def validate_batch_captions(captions, image_count, full_response):
     """Validate captions extracted from a batch response."""
     # Check if all captions are empty or don't contain the trigger word
     valid_captions = [c for c in captions if c and "tr1gger" in c]
     if not valid_captions:
-        error_msg = "Failed to parse any valid captions from batch response. Response contained no lines with 'tr1gger'"
         error_msg += f"\n\nActual response:\n{full_response}"
         raise CaptioningError(error_msg)
     # Check if some captions are missing
     if len(valid_captions) < image_count:
         missing_count = image_count - len(valid_captions)
@@ -186,24 +184,22 @@ def validate_batch_captions(captions, image_count, full_response):
         error_msg = f"Failed to parse captions for {missing_count} of {image_count} images in batch mode"
         error_msg += "\n\nMalformed captions:"
         for idx, caption in invalid_captions:
-            error_msg += f"\nImage {idx+1}: '{caption}'"
         raise CaptioningError(error_msg)
 def caption_images(images, category=None, batch_mode=False):
     """Caption a list of images, either individually or in batch mode."""
-    # Convert PIL images to base64 encoded strings
     image_strings = images_to_base64(images)
-    # Initialize the API client
     client = get_together_client()
-    # Process images based on the mode
     if batch_mode and category:
-        return caption_batch_images(client, image_strings, category)
     else:
-        # Process each image individually
         return [caption_single_image(client, img_str) for img_str in image_strings]
 def extract_captions(file_path):
     captions = []
     with open(file_path, 'r') as file:

 import io
 import os
 from together import Together
+def get_system_prompt():
     return """Automated Image Captioning (for LoRA Training)
 Role: You are an expert AI captioning system generating precise, structured descriptions for character images optimized for LoRA model training in Stable Diffusion and Flux.1-dev.
 REMEMBER: Your response must be a single line starting with "tr1gger" and following the exact format above. No additional text, formatting, or explanations are allowed.
 """
 class CaptioningError(Exception):
     """Exception raised for errors in the captioning process."""
     pass
 def images_to_base64(images):
     """Convert a list of PIL images to base64 encoded strings."""
     image_strings = []
         image_strings.append(img_str)
     return image_strings
 def get_together_client():
     """Initialize and return the Together API client."""
     api_key = os.environ.get("TOGETHER_API_KEY")
     if not api_key:
+        raise ValueError("TOGETHER_API_KEY not set!")
     return Together(api_key=api_key)
+def extract_caption(line):
+    """Extract caption from a line of text."""
     if "tr1gger" in line:
         # If caption doesn't start with tr1gger but contains it, extract just that part
         if not line.startswith("tr1gger"):
         return line
     return ""
 def caption_single_image(client, img_str):
     """Process and caption a single image."""
     messages = [
+        {"role": "system", "content": get_system_prompt()},
         {
             "role": "user",
             "content": [
             ]
         }
     ]
     # Request caption for the image using Llama 4 Maverick
     response = client.chat.completions.create(
         model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
         messages=messages
     )
     full_response = response.choices[0].message.content.strip()
     caption = ""
     for line in full_response.splitlines():
+        caption = extract_caption(line)
         if caption:
             break
     if not caption:
         error_msg = "Failed to extract a valid caption (containing 'tr1gger') from the response"
         error_msg += f"\n\nActual response:\n{full_response}"
         raise CaptioningError(error_msg)
     return caption
+def caption_image_batch(client, image_strings, category):
     """Process and caption multiple images in a single batch request."""
     # Create a content array with all images
+    content = [{"type": "text",
+                "text": f"Here is the batch of images for {category}. "
+                        f"Caption each image on a separate line."}]
     for i, img_str in enumerate(image_strings):
         content.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}})
+        content.append({"type": "text", "text": f"Image {i + 1}"})
     messages = [
+        {"role": "system", "content": get_system_prompt()},
         {"role": "user", "content": content}
     ]
     response = client.chat.completions.create(
         model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
         messages=messages
     )
     return process_batch_response(response, image_strings)
 def process_batch_response(response, image_strings):
     """Process the API response from a batch request and extract captions."""
     full_response = response.choices[0].message.content.strip()
     lines = full_response.splitlines()
     # Extract captions from the response
     image_count = len(image_strings)
+    captions = [""] * image_count
     # Extract lines that start with or contain "tr1gger"
+    caption_lines = [line for line in lines if "tr1gger" in line]
     # Assign captions to images
     for i in range(image_count):
+        if i < len(caption_lines):
+            caption = extract_caption(caption_lines[i])
             captions[i] = caption
     validate_batch_captions(captions, image_count, full_response)
     return captions
 def validate_batch_captions(captions, image_count, full_response):
     """Validate captions extracted from a batch response."""
     # Check if all captions are empty or don't contain the trigger word
     valid_captions = [c for c in captions if c and "tr1gger" in c]
     if not valid_captions:
+        error_msg = "Failed to parse any valid captions from batch response."
         error_msg += f"\n\nActual response:\n{full_response}"
         raise CaptioningError(error_msg)
     # Check if some captions are missing
     if len(valid_captions) < image_count:
         missing_count = image_count - len(valid_captions)
         error_msg = f"Failed to parse captions for {missing_count} of {image_count} images in batch mode"
         error_msg += "\n\nMalformed captions:"
         for idx, caption in invalid_captions:
+            error_msg += f"\nImage {idx + 1}: '{caption}'"
         raise CaptioningError(error_msg)
 def caption_images(images, category=None, batch_mode=False):
     """Caption a list of images, either individually or in batch mode."""
     image_strings = images_to_base64(images)
     client = get_together_client()
     if batch_mode and category:
+        return caption_image_batch(client, image_strings, category)
     else:
         return [caption_single_image(client, img_str) for img_str in image_strings]
 def extract_captions(file_path):
     captions = []
     with open(file_path, 'r') as file:

main.py CHANGED Viewed

@@ -6,34 +6,38 @@ from pathlib import Path
 from PIL import Image
 from caption import caption_images
 def is_image_file(filename):
     """Check if a file is an allowed image type."""
     allowed_extensions = ['.png', '.jpg', '.jpeg', '.webp']
     return any(filename.lower().endswith(ext) for ext in allowed_extensions)
 def is_unsupported_image(filename):
     """Check if a file is an image but not of an allowed type."""
     unsupported_extensions = ['.bmp', '.gif', '.tiff', '.tif', '.ico', '.svg']
     return any(filename.lower().endswith(ext) for ext in unsupported_extensions)
 def is_text_file(filename):
     """Check if a file is a text file."""
     return filename.lower().endswith('.txt')
 def validate_input_directory(input_dir):
     """Validate that the input directory only contains allowed image formats."""
     input_path = Path(input_dir)
     unsupported_files = []
     text_files = []
     for file_path in input_path.iterdir():
         if file_path.is_file():
             if is_unsupported_image(file_path.name):
                 unsupported_files.append(file_path.name)
             elif is_text_file(file_path.name):
                 text_files.append(file_path.name)
     if unsupported_files:
         print("Error: Unsupported image formats detected.")
         print("Only .png, .jpg, .jpeg, and .webp files are allowed.")
@@ -41,13 +45,14 @@ def validate_input_directory(input_dir):
         for file in unsupported_files:
             print(f"  - {file}")
         sys.exit(1)
     if text_files:
         print("Warning: Text files detected in the input directory.")
         print("The following text files will be overwritten:")
         for file in text_files:
             print(f"  - {file}")
 def collect_images_by_category(input_path):
     """Collect all valid images and group them by category."""
     images_by_category = {}
@@ -56,28 +61,27 @@ def collect_images_by_category(input_path):
     for file_path in input_path.iterdir():
         if file_path.is_file() and is_image_file(file_path.name):
             try:
-                # Load the image
                 image = Image.open(file_path).convert("RGB")
                 # Determine the category from the filename
                 category = file_path.stem.rsplit('_', 1)[0]
                 # Add image to the appropriate category
                 if category not in images_by_category:
                     images_by_category[category] = []
                     image_paths_by_category[category] = []
                 images_by_category[category].append(image)
                 image_paths_by_category[category].append(file_path)
             except Exception as e:
                 print(f"Error loading {file_path.name}: {e}")
     return images_by_category, image_paths_by_category
 def process_by_category(images_by_category, image_paths_by_category, input_path, output_path):
     """Process images in batches by category."""
     processed_count = 0
     for category, images in images_by_category.items():
         image_paths = image_paths_by_category[category]
         try:
@@ -87,35 +91,31 @@ def process_by_category(images_by_category, image_paths_by_category, input_path,
             processed_count += len(images)
         except Exception as e:
             print(f"Error generating captions for category '{category}': {e}")
     return processed_count
 def process_all_at_once(images_by_category, image_paths_by_category, input_path, output_path):
     """Process all images at once."""
     all_images = [img for imgs in images_by_category.values() for img in imgs]
     all_image_paths = [path for paths in image_paths_by_category.values() for path in paths]
     processed_count = 0
     try:
         captions = caption_images(all_images, batch_mode=False)
         write_captions(all_image_paths, captions, input_path, output_path)
         processed_count += len(all_images)
     except Exception as e:
         print(f"Error generating captions: {e}")
     return processed_count
 def process_images(input_dir, output_dir, fix_outfit=False, batch_images=False):
     """Process all images in the input directory and generate captions."""
     input_path = Path(input_dir)
     output_path = Path(output_dir) if output_dir else input_path
-    # Validate the input directory first
     validate_input_directory(input_dir)
-    # Create output directory if it doesn't exist
     os.makedirs(output_path, exist_ok=True)
     # Collect images by category
     images_by_category, image_paths_by_category = collect_images_by_category(input_path)
@@ -127,7 +127,6 @@ def process_images(input_dir, output_dir, fix_outfit=False, batch_images=False):
         print("No valid images found to process.")
         return
-    # Process images based on batch setting
     if batch_images:
         processed_count = process_by_category(images_by_category, image_paths_by_category, input_path, output_path)
     else:
@@ -135,6 +134,7 @@ def process_images(input_dir, output_dir, fix_outfit=False, batch_images=False):
     print(f"\nProcessing complete. {processed_count} images were captioned.")
 def write_captions(image_paths, captions, input_path, output_path):
     """Helper function to write captions to files."""
     for file_path, caption in zip(image_paths, captions):
@@ -143,37 +143,33 @@ def write_captions(image_paths, captions, input_path, output_path):
             caption_filename = file_path.stem + ".txt"
             caption_path = input_path / caption_filename
-            # Write caption to file
             with open(caption_path, 'w', encoding='utf-8') as f:
                 f.write(caption)
             # If output directory is different from input, copy files
             if output_path != input_path:
-                # Copy image to output directory
                 shutil.copy2(file_path, output_path / file_path.name)
-                # Copy caption to output directory
                 shutil.copy2(caption_path, output_path / caption_filename)
             print(f"Processed {file_path.name} → {caption_filename}")
         except Exception as e:
             print(f"Error processing {file_path.name}: {e}")
 def main():
     parser = argparse.ArgumentParser(description='Generate captions for images using GPT-4o.')
     parser.add_argument('--input', type=str, required=True, help='Directory containing images')
     parser.add_argument('--output', type=str, help='Directory to save images and captions (defaults to input directory)')
     parser.add_argument('--fix_outfit', action='store_true', help='Flag to indicate if character has one outfit')
     parser.add_argument('--batch_images', action='store_true', help='Flag to indicate if images should be processed in batches')
     args = parser.parse_args()
-    # Validate input directory
     if not os.path.isdir(args.input):
         print(f"Error: Input directory '{args.input}' does not exist.")
         return
-    # Process images
     process_images(args.input, args.output, args.fix_outfit, args.batch_images)
 if __name__ == "__main__":
-    main()

 from PIL import Image
 from caption import caption_images
 def is_image_file(filename):
     """Check if a file is an allowed image type."""
     allowed_extensions = ['.png', '.jpg', '.jpeg', '.webp']
     return any(filename.lower().endswith(ext) for ext in allowed_extensions)
 def is_unsupported_image(filename):
     """Check if a file is an image but not of an allowed type."""
     unsupported_extensions = ['.bmp', '.gif', '.tiff', '.tif', '.ico', '.svg']
     return any(filename.lower().endswith(ext) for ext in unsupported_extensions)
 def is_text_file(filename):
     """Check if a file is a text file."""
     return filename.lower().endswith('.txt')
 def validate_input_directory(input_dir):
     """Validate that the input directory only contains allowed image formats."""
     input_path = Path(input_dir)
     unsupported_files = []
     text_files = []
     for file_path in input_path.iterdir():
         if file_path.is_file():
             if is_unsupported_image(file_path.name):
                 unsupported_files.append(file_path.name)
             elif is_text_file(file_path.name):
                 text_files.append(file_path.name)
     if unsupported_files:
         print("Error: Unsupported image formats detected.")
         print("Only .png, .jpg, .jpeg, and .webp files are allowed.")
         for file in unsupported_files:
             print(f"  - {file}")
         sys.exit(1)
     if text_files:
         print("Warning: Text files detected in the input directory.")
         print("The following text files will be overwritten:")
         for file in text_files:
             print(f"  - {file}")
 def collect_images_by_category(input_path):
     """Collect all valid images and group them by category."""
     images_by_category = {}
     for file_path in input_path.iterdir():
         if file_path.is_file() and is_image_file(file_path.name):
             try:
                 image = Image.open(file_path).convert("RGB")
                 # Determine the category from the filename
                 category = file_path.stem.rsplit('_', 1)[0]
                 # Add image to the appropriate category
                 if category not in images_by_category:
                     images_by_category[category] = []
                     image_paths_by_category[category] = []
                 images_by_category[category].append(image)
                 image_paths_by_category[category].append(file_path)
             except Exception as e:
                 print(f"Error loading {file_path.name}: {e}")
     return images_by_category, image_paths_by_category
 def process_by_category(images_by_category, image_paths_by_category, input_path, output_path):
     """Process images in batches by category."""
     processed_count = 0
     for category, images in images_by_category.items():
         image_paths = image_paths_by_category[category]
         try:
             processed_count += len(images)
         except Exception as e:
             print(f"Error generating captions for category '{category}': {e}")
     return processed_count
 def process_all_at_once(images_by_category, image_paths_by_category, input_path, output_path):
     """Process all images at once."""
     all_images = [img for imgs in images_by_category.values() for img in imgs]
     all_image_paths = [path for paths in image_paths_by_category.values() for path in paths]
     processed_count = 0
     try:
         captions = caption_images(all_images, batch_mode=False)
         write_captions(all_image_paths, captions, input_path, output_path)
         processed_count += len(all_images)
     except Exception as e:
         print(f"Error generating captions: {e}")
     return processed_count
 def process_images(input_dir, output_dir, fix_outfit=False, batch_images=False):
     """Process all images in the input directory and generate captions."""
     input_path = Path(input_dir)
     output_path = Path(output_dir) if output_dir else input_path
     validate_input_directory(input_dir)
     os.makedirs(output_path, exist_ok=True)
     # Collect images by category
     images_by_category, image_paths_by_category = collect_images_by_category(input_path)
         print("No valid images found to process.")
         return
     if batch_images:
         processed_count = process_by_category(images_by_category, image_paths_by_category, input_path, output_path)
     else:
     print(f"\nProcessing complete. {processed_count} images were captioned.")
 def write_captions(image_paths, captions, input_path, output_path):
     """Helper function to write captions to files."""
     for file_path, caption in zip(image_paths, captions):
             caption_filename = file_path.stem + ".txt"
             caption_path = input_path / caption_filename
             with open(caption_path, 'w', encoding='utf-8') as f:
                 f.write(caption)
             # If output directory is different from input, copy files
             if output_path != input_path:
                 shutil.copy2(file_path, output_path / file_path.name)
                 shutil.copy2(caption_path, output_path / caption_filename)
             print(f"Processed {file_path.name} → {caption_filename}")
         except Exception as e:
             print(f"Error processing {file_path.name}: {e}")
 def main():
     parser = argparse.ArgumentParser(description='Generate captions for images using GPT-4o.')
     parser.add_argument('--input', type=str, required=True, help='Directory containing images')
     parser.add_argument('--output', type=str, help='Directory to save images and captions (defaults to input directory)')
     parser.add_argument('--fix_outfit', action='store_true', help='Flag to indicate if character has one outfit')
     parser.add_argument('--batch_images', action='store_true', help='Flag to indicate if images should be processed in batches')
     args = parser.parse_args()
     if not os.path.isdir(args.input):
         print(f"Error: Input directory '{args.input}' does not exist.")
         return
     process_images(args.input, args.output, args.fix_outfit, args.batch_images)
 if __name__ == "__main__":
+    main()

prompt.py CHANGED Viewed

@@ -1,23 +1,18 @@
 import os
 import argparse
 from pathlib import Path
-from caption import get_prompt, get_together_client, extract_captions
 def optimize_prompt(user_prompt, captions_dir=None, captions_list=None):
-    """
-    Optimize a user prompt to follow the same format as training captions.
     Args:
-        user_prompt (str): The simple user prompt to optimize (e.g., "woman riding a bike")
         captions_dir (str, optional): Directory containing caption .txt files
         captions_list (list, optional): List of captions to use instead of loading from files
-    Returns:
-        str: The optimized prompt following the training format
     """
-    # Get captions either from directory or provided list
     all_captions = []
     if captions_list:
         all_captions = captions_list
     elif captions_dir:
@@ -26,19 +21,18 @@ def optimize_prompt(user_prompt, captions_dir=None, captions_list=None):
         for file_path in captions_path.glob("*.txt"):
             captions = extract_captions(file_path)
             all_captions.extend(captions)
     if not all_captions:
-        raise ValueError("No captions found. Please provide either caption files or a list of captions.")
     # Concatenate all captions with newlines
     captions_text = "\n".join(all_captions)
     client = get_together_client()
     messages = [
-        {"role": "system", "content": get_prompt()},
         {
-            "role": "user",
             "content": (
                 f"These are all of the captions used to train the LoRA:\n\n"
                 f"{captions_text}\n\n"
@@ -47,36 +41,34 @@ def optimize_prompt(user_prompt, captions_dir=None, captions_list=None):
             )
         }
     ]
     response = client.chat.completions.create(
         model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
         messages=messages
     )
     optimized_prompt = response.choices[0].message.content.strip()
     return optimized_prompt
 def main():
     parser = argparse.ArgumentParser(description='Optimize prompts based on existing captions.')
     parser.add_argument('--prompt', type=str, required=True, help='User prompt to optimize')
-    parser.add_argument('--captions', type=str, help='Directory containing caption .txt files')
     args = parser.parse_args()
-    if not args.captions:
-        print("Error: --captions is required.")
-        return
     if not os.path.isdir(args.captions):
         print(f"Error: Captions directory '{args.captions}' does not exist.")
         return
     try:
         optimized_prompt = optimize_prompt(args.prompt, args.captions)
         print("\nOptimized Prompt:")
         print(optimized_prompt)
     except Exception as e:
         print(f"Error optimizing prompt: {e}")
 if __name__ == "__main__":
     main()

 import os
 import argparse
 from pathlib import Path
+from caption import get_system_prompt, get_together_client, extract_captions
 def optimize_prompt(user_prompt, captions_dir=None, captions_list=None):
+    """Optimize a user prompt to follow the same format as training captions.
     Args:
+        user_prompt (str): The simple user prompt to optimize
         captions_dir (str, optional): Directory containing caption .txt files
         captions_list (list, optional): List of captions to use instead of loading from files
     """
     all_captions = []
     if captions_list:
         all_captions = captions_list
     elif captions_dir:
         for file_path in captions_path.glob("*.txt"):
             captions = extract_captions(file_path)
             all_captions.extend(captions)
     if not all_captions:
+        raise ValueError("Please provide either caption files or a list of captions!")
     # Concatenate all captions with newlines
     captions_text = "\n".join(all_captions)
     client = get_together_client()
     messages = [
+        {"role": "system", "content": get_system_prompt()},
         {
+            "role": "user",
             "content": (
                 f"These are all of the captions used to train the LoRA:\n\n"
                 f"{captions_text}\n\n"
             )
         }
     ]
     response = client.chat.completions.create(
         model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
         messages=messages
     )
     optimized_prompt = response.choices[0].message.content.strip()
     return optimized_prompt
 def main():
     parser = argparse.ArgumentParser(description='Optimize prompts based on existing captions.')
     parser.add_argument('--prompt', type=str, required=True, help='User prompt to optimize')
+    parser.add_argument('--captions', type=str, required=True,help='Directory containing caption .txt files')
     args = parser.parse_args()
     if not os.path.isdir(args.captions):
         print(f"Error: Captions directory '{args.captions}' does not exist.")
         return
     try:
         optimized_prompt = optimize_prompt(args.prompt, args.captions)
         print("\nOptimized Prompt:")
         print(optimized_prompt)
     except Exception as e:
         print(f"Error optimizing prompt: {e}")
 if __name__ == "__main__":
     main()