Rishi Desai commited on
Commit
ebbd273
·
1 Parent(s): d8dc4ed

removed hardcoded trigger word

Browse files
Files changed (1) hide show
  1. caption.py +20 -20
caption.py CHANGED
@@ -3,6 +3,7 @@ import io
3
  import os
4
  from together import Together
5
 
 
6
 
7
  def get_system_prompt():
8
  return """Automated Image Captioning (for LoRA Training)
@@ -10,9 +11,8 @@ def get_system_prompt():
10
  Role: You are an expert AI captioning system generating precise, structured descriptions for character images optimized for LoRA model training in Stable Diffusion and Flux.1-dev.
11
 
12
  IMPORTANT: You MUST follow these rules EXACTLY:
13
- 1. EVERY caption MUST start with the word "tr1gger" (exactly like this, no variations)
14
- 2. You MUST use the exact format: tr1gger [Style], [Notable Visual Features], [Clothing], [Pose], [Expression], [Lighting], [Camera Angle]
15
- 3. DO NOT include any additional text, explanations, or formatting
16
  4. DO NOT use bullet points, lists, or any other formatting
17
  5. DO NOT include any text before or after the caption
18
  6. If you don't follow this format exactly, the caption will be rejected
@@ -34,7 +34,7 @@ Avoid Describing These Unless Variable Across Dataset or Uncertain from Concept:
34
  - Known accessories that always appear (unless outfit-specific)
35
 
36
  Caption Format (MUST FOLLOW EXACTLY):
37
- tr1gger [Style], [Notable Visual Features], [Clothing], [Pose], [Expression/Mood], [Background/Setting], [Lighting], [Camera Angle]
38
 
39
  Captioning Principles:
40
  - Emphasize visual variation and context-specific details (outfit, pose, lighting, expression, camera angle).
@@ -44,15 +44,15 @@ Captioning Principles:
44
  - Specify lighting conditions (soft lighting, harsh shadows, glowing backlight).
45
  - Explicitly state camera angle (e.g., front view, right side profile, low-angle, high-angle, overhead).
46
  - Avoid mentioning real or fictional identities.
47
- - Always prefix with the trigger word "tr1gger."
48
 
49
  Examples (MUST FOLLOW THIS EXACT FORMAT):
50
- tr1gger photorealistic, combat gear, tactical vest and gloves, standing in profile, neutral, empty room, overcast lighting, side profile
51
- tr1gger 3D-rendered, digital patterns, hooded cloak, seated cross-legged, calm, meditation chamber, low ambient lighting, front view
52
- tr1gger anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, classroom, soft daylight, three-quarter view
53
- tr1gger photorealistic, long trench coat and combat boots, walking, determined, rain-soaked street, dramatic shadows, low-angle view
54
 
55
- REMEMBER: Your response must be a single line starting with "tr1gger" and following the exact format above. No additional text, formatting, or explanations are allowed.
56
  """
57
 
58
 
@@ -82,10 +82,10 @@ def get_together_client():
82
 
83
  def extract_caption(line):
84
  """Extract caption from a line of text."""
85
- if "tr1gger" in line:
86
- # If caption doesn't start with tr1gger but contains it, extract just that part
87
- if not line.startswith("tr1gger"):
88
- return line[line.index("tr1gger"):]
89
  return line
90
  return ""
91
 
@@ -117,7 +117,7 @@ def caption_single_image(client, img_str):
117
  break
118
 
119
  if not caption:
120
- error_msg = "Failed to extract a valid caption (containing 'tr1gger') from the response"
121
  error_msg += f"\n\nActual response:\n{full_response}"
122
  raise CaptioningError(error_msg)
123
 
@@ -155,8 +155,8 @@ def process_batch_response(response, image_strings):
155
  image_count = len(image_strings)
156
  captions = [""] * image_count
157
 
158
- # Extract lines that start with or contain "tr1gger"
159
- caption_lines = [line for line in lines if "tr1gger" in line]
160
 
161
  # Assign captions to images
162
  for i in range(image_count):
@@ -171,7 +171,7 @@ def process_batch_response(response, image_strings):
171
  def validate_batch_captions(captions, image_count, full_response):
172
  """Validate captions extracted from a batch response."""
173
  # Check if all captions are empty or don't contain the trigger word
174
- valid_captions = [c for c in captions if c and "tr1gger" in c]
175
  if not valid_captions:
176
  error_msg = "Failed to parse any valid captions from batch response."
177
  error_msg += f"\n\nActual response:\n{full_response}"
@@ -180,7 +180,7 @@ def validate_batch_captions(captions, image_count, full_response):
180
  # Check if some captions are missing
181
  if len(valid_captions) < image_count:
182
  missing_count = image_count - len(valid_captions)
183
- invalid_captions = [(i, c) for i, c in enumerate(captions) if not c or "tr1gger" not in c]
184
  error_msg = f"Failed to parse captions for {missing_count} of {image_count} images in batch mode"
185
  error_msg += "\n\nMalformed captions:"
186
  for idx, caption in invalid_captions:
@@ -204,6 +204,6 @@ def extract_captions(file_path):
204
  captions = []
205
  with open(file_path, 'r') as file:
206
  for line in file:
207
- if line.startswith("tr1gger"):
208
  captions.append(line.strip())
209
  return captions
 
3
  import os
4
  from together import Together
5
 
6
+ TRIGGER_WORD = "tr1gger"
7
 
8
  def get_system_prompt():
9
  return """Automated Image Captioning (for LoRA Training)
 
11
  Role: You are an expert AI captioning system generating precise, structured descriptions for character images optimized for LoRA model training in Stable Diffusion and Flux.1-dev.
12
 
13
  IMPORTANT: You MUST follow these rules EXACTLY:
14
+ 1. EVERY caption MUST start with the word "{TRIGGER_WORD}" (exactly like this, no variations)
15
+ 2. You MUST use the exact format: {TRIGGER_WORD} [Style], [Notable Visual Features], [Clothing], [Pose], [Expression/Mood], [Background/Setting], [Lighting], [Camera Angle]
 
16
  4. DO NOT use bullet points, lists, or any other formatting
17
  5. DO NOT include any text before or after the caption
18
  6. If you don't follow this format exactly, the caption will be rejected
 
34
  - Known accessories that always appear (unless outfit-specific)
35
 
36
  Caption Format (MUST FOLLOW EXACTLY):
37
+ {TRIGGER_WORD} [Style], [Notable Visual Features], [Clothing], [Pose], [Expression/Mood], [Background/Setting], [Lighting], [Camera Angle]
38
 
39
  Captioning Principles:
40
  - Emphasize visual variation and context-specific details (outfit, pose, lighting, expression, camera angle).
 
44
  - Specify lighting conditions (soft lighting, harsh shadows, glowing backlight).
45
  - Explicitly state camera angle (e.g., front view, right side profile, low-angle, high-angle, overhead).
46
  - Avoid mentioning real or fictional identities.
47
+ - Always prefix with the trigger word "{TRIGGER_WORD}."
48
 
49
  Examples (MUST FOLLOW THIS EXACT FORMAT):
50
+ {TRIGGER_WORD} photorealistic, combat gear, tactical vest and gloves, standing in profile, neutral, empty room, overcast lighting, side profile
51
+ {TRIGGER_WORD} 3D-rendered, digital patterns, hooded cloak, seated cross-legged, calm, meditation chamber, low ambient lighting, front view
52
+ {TRIGGER_WORD} anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, classroom, soft daylight, three-quarter view
53
+ {TRIGGER_WORD} photorealistic, long trench coat and combat boots, walking, determined, rain-soaked street, dramatic shadows, low-angle view
54
 
55
+ REMEMBER: Your response must be a single line starting with "{TRIGGER_WORD}" and following the exact format above. No additional text, formatting, or explanations are allowed.
56
  """
57
 
58
 
 
82
 
83
  def extract_caption(line):
84
  """Extract caption from a line of text."""
85
+ if TRIGGER_WORD in line:
86
+ # If caption doesn't start with trigger_word but contains it, extract just that part
87
+ if not line.startswith(TRIGGER_WORD):
88
+ return line[line.index(TRIGGER_WORD):]
89
  return line
90
  return ""
91
 
 
117
  break
118
 
119
  if not caption:
120
+ error_msg = f"Failed to extract a valid caption (containing '{TRIGGER_WORD}') from the response"
121
  error_msg += f"\n\nActual response:\n{full_response}"
122
  raise CaptioningError(error_msg)
123
 
 
155
  image_count = len(image_strings)
156
  captions = [""] * image_count
157
 
158
+ # Extract lines that start with or contain trigger_word
159
+ caption_lines = [line for line in lines if TRIGGER_WORD in line]
160
 
161
  # Assign captions to images
162
  for i in range(image_count):
 
171
  def validate_batch_captions(captions, image_count, full_response):
172
  """Validate captions extracted from a batch response."""
173
  # Check if all captions are empty or don't contain the trigger word
174
+ valid_captions = [c for c in captions if c and TRIGGER_WORD in c]
175
  if not valid_captions:
176
  error_msg = "Failed to parse any valid captions from batch response."
177
  error_msg += f"\n\nActual response:\n{full_response}"
 
180
  # Check if some captions are missing
181
  if len(valid_captions) < image_count:
182
  missing_count = image_count - len(valid_captions)
183
+ invalid_captions = [(i, c) for i, c in enumerate(captions) if not c or TRIGGER_WORD not in c]
184
  error_msg = f"Failed to parse captions for {missing_count} of {image_count} images in batch mode"
185
  error_msg += "\n\nMalformed captions:"
186
  for idx, caption in invalid_captions:
 
204
  captions = []
205
  with open(file_path, 'r') as file:
206
  for line in file:
207
+ if line.startswith(TRIGGER_WORD):
208
  captions.append(line.strip())
209
  return captions