Rishi Desai commited on
Commit
e3a461e
·
1 Parent(s): a28e4db

using main.py in demo

Browse files
Files changed (2) hide show
  1. caption.py +1 -1
  2. demo.py +38 -200
caption.py CHANGED
@@ -3,7 +3,7 @@ import io
3
  import os
4
  from together import Together
5
 
6
- MODEL = "deepseek-ai/DeepSeek-V3"
7
  TRIGGER_WORD = "tr1gger"
8
 
9
  def get_system_prompt():
 
3
  import os
4
  from together import Together
5
 
6
+ MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
7
  TRIGGER_WORD = "tr1gger"
8
 
9
  def get_system_prompt():
demo.py CHANGED
@@ -4,10 +4,10 @@ import zipfile
4
  from io import BytesIO
5
  import time
6
  import tempfile
7
- from main import collect_images_by_category, is_image_file
8
  from pathlib import Path
9
- from caption import caption_images
10
- from PIL import Image
 
11
 
12
  # Maximum number of images
13
  MAX_IMAGES = 30
@@ -33,208 +33,46 @@ def create_download_file(image_paths, captions):
33
 
34
  return zip_io.getvalue()
35
 
36
- def save_images_to_temp(image_paths, temp_dir):
37
- """Copy images to temporary directory and return mapping"""
38
- temp_image_paths = []
39
- original_to_temp = {} # Map original paths to temp paths
40
-
41
- for path in image_paths:
42
- # Keep original filename to preserve categorization
43
- filename = os.path.basename(path)
44
- temp_path = os.path.join(temp_dir, filename)
45
-
46
- # Ensure we're using consistent path formats
47
- orig_path_str = str(path)
48
- temp_path_str = str(temp_path)
49
-
50
- with open(path, 'rb') as src, open(temp_path, 'wb') as dst:
51
- dst.write(src.read())
52
-
53
- temp_image_paths.append(temp_path_str)
54
- original_to_temp[orig_path_str] = temp_path_str
55
- print(f"Copied {orig_path_str} to {temp_path_str}")
56
-
57
- print(f"Created {len(temp_image_paths)} temporary files")
58
- return temp_image_paths, original_to_temp
59
-
60
- def process_by_category(images_by_category, image_paths_by_category, image_paths, original_to_temp):
61
- """Process images by category and map captions back to original images"""
62
- captions = [""] * len(image_paths) # Initialize with empty strings
63
-
64
- # Create a mapping from temp path to index in the original image_paths
65
- temp_to_original_idx = {}
66
- for i, orig_path in enumerate(image_paths):
67
- if orig_path in original_to_temp:
68
- temp_to_original_idx[original_to_temp[orig_path]] = i
69
-
70
- print(f"Created mapping for {len(temp_to_original_idx)} images")
71
-
72
- for category, images in images_by_category.items():
73
- category_paths = image_paths_by_category[category]
74
- print(f"Processing category '{category}' with {len(images)} images")
75
-
76
- # Create mapping of image to its position in the category
77
- category_image_map = {}
78
- for i, (img, path) in enumerate(zip(images, category_paths)):
79
- category_image_map[str(path)] = i
80
-
81
- try:
82
- # Use the same code path as CLI
83
- category_captions = caption_images(images, category=category, batch_mode=True)
84
- print(f"Generated {len(category_captions)} captions for category '{category}'")
85
-
86
- # Map captions back to original paths using our direct mapping
87
- for i, temp_path in enumerate(category_paths):
88
- temp_path_str = str(temp_path)
89
- if i < len(category_captions) and temp_path_str in temp_to_original_idx:
90
- original_idx = temp_to_original_idx[temp_path_str]
91
- captions[original_idx] = category_captions[i]
92
- except Exception as e:
93
- print(f"Error processing category '{category}': {e}")
94
- # Fall back to individual processing for this category
95
- try:
96
- print(f"Falling back to individual processing for category '{category}'")
97
- for i, img in enumerate(images):
98
- if i >= len(category_paths):
99
- continue
100
- temp_path = category_paths[i]
101
- temp_path_str = str(temp_path)
102
-
103
- try:
104
- single_captions = caption_images([img], batch_mode=False)
105
- if single_captions and len(single_captions) > 0:
106
- if temp_path_str in temp_to_original_idx:
107
- original_idx = temp_to_original_idx[temp_path_str]
108
- captions[original_idx] = single_captions[0]
109
- except Exception as inner_e:
110
- print(f"Error processing individual image {i} in '{category}': {inner_e}")
111
- except Exception as fallback_e:
112
- print(f"Error in fallback processing for '{category}': {fallback_e}")
113
-
114
- return captions
115
-
116
- def process_all_images(all_images, all_image_paths, image_paths):
117
- """Process all images at once without categorization"""
118
- print(f"Processing all {len(all_images)} images at once")
119
-
120
- # Initialize empty captions list
121
- captions = [""] * len(image_paths) # Initialize with empty strings for all original paths
122
-
123
- # If there are no images, return empty captions
124
- if not all_images:
125
- print("No images to process, returning empty captions")
126
- return captions
127
-
128
- # Create a mapping from temp paths to original indexes for efficient lookup
129
- path_to_idx = {str(path): i for i, path in enumerate(image_paths)}
130
-
131
- try:
132
- all_captions = caption_images(all_images, batch_mode=False)
133
- print(f"Generated {len(all_captions)} captions")
134
-
135
- # Map captions to the right images using the prepared image_paths
136
- for i, (path, caption) in enumerate(zip(all_image_paths, all_captions)):
137
- if i < len(all_captions) and path in path_to_idx:
138
- idx = path_to_idx[path]
139
- captions[idx] = caption
140
- except Exception as e:
141
- print(f"Error generating captions: {e}")
142
-
143
- return captions
144
-
145
  def process_uploaded_images(image_paths, batch_by_category=False):
146
- """Process uploaded images using the same code path as CLI"""
147
- try:
148
- # Convert all image paths to strings for consistency
149
- image_paths = [str(path) for path in image_paths]
150
- print(f"Processing {len(image_paths)} images, batch_by_category={batch_by_category}")
 
151
 
152
- # Create temporary directory with images
153
- with tempfile.TemporaryDirectory() as temp_dir:
154
- # Save images to temp directory
155
- temp_image_paths, original_to_temp = save_images_to_temp(image_paths, temp_dir)
156
-
157
- # Use Path object for consistency with main.py
158
- temp_dir_path = Path(temp_dir)
159
-
160
- # List files in temp directory for debugging
161
- print(f"Files in temp directory {temp_dir}:")
162
- for f in temp_dir_path.iterdir():
163
- print(f" - {f} (is_file: {f.is_file()}, is_image: {is_image_file(f.name)})")
164
-
165
- # Collect images by category using the function from main.py
166
- images_by_category, image_paths_by_category = collect_images_by_category(temp_dir_path)
167
-
168
- # Print categories and counts for debugging
169
- print(f"Collected images into {len(images_by_category)} categories")
170
- for category, images in images_by_category.items():
171
- print(f" - Category '{category}': {len(images)} images")
172
-
173
- # Check if we actually have images to process
174
- total_images = sum(len(images) for images in images_by_category.values())
175
- if total_images == 0:
176
- print("No images were properly categorized. Adding all images directly.")
177
- # Add all images directly without categorization
178
- default_category = "default"
179
- images_by_category[default_category] = []
180
- image_paths_by_category[default_category] = []
181
-
182
- for path in image_paths:
183
- path_str = str(path)
184
- try:
185
- if path_str in original_to_temp:
186
- temp_path = original_to_temp[path_str]
187
- temp_path_obj = Path(temp_path)
188
- img = Image.open(temp_path).convert("RGB")
189
- images_by_category[default_category].append(img)
190
- image_paths_by_category[default_category].append(temp_path_obj)
191
- except Exception as e:
192
- print(f"Error loading image {path}: {e}")
193
-
194
- # Map back to original paths for consistent ordering
195
- all_images = []
196
- all_image_paths = []
197
-
198
- # Create reverse mapping for lookup
199
- temp_to_orig = {v: k for k, v in original_to_temp.items()}
200
-
201
- # Go through each category and map back to original
202
- for category in images_by_category:
203
- for i, temp_path in enumerate(image_paths_by_category[category]):
204
- temp_path_str = str(temp_path)
205
- if temp_path_str in temp_to_orig:
206
- orig_path = temp_to_orig[temp_path_str]
207
- if i < len(images_by_category[category]):
208
- all_images.append(images_by_category[category][i])
209
- all_image_paths.append(orig_path)
210
-
211
- # Ensure we maintain original order
212
- ordered_images = []
213
- ordered_paths = []
214
 
215
- for orig_path in image_paths:
216
- path_str = str(orig_path)
217
- for i, path in enumerate(all_image_paths):
218
- if path == path_str and i < len(all_images):
219
- ordered_images.append(all_images[i])
220
- ordered_paths.append(path)
221
- break
222
 
223
- print(f"Collected {len(ordered_images)} images in correct order")
 
 
 
 
 
 
 
 
 
224
 
225
- # Process based on batch setting
226
- if batch_by_category and len(images_by_category) > 0:
227
- captions = process_by_category(images_by_category, image_paths_by_category, image_paths, original_to_temp)
 
 
228
  else:
229
- # Use our own function for non-batch mode since it needs to map back to UI
230
- captions = process_all_images(ordered_images, ordered_paths, image_paths)
231
-
232
- print(f"Returning {len(captions)} captions")
233
- return captions
234
-
235
- except Exception as e:
236
- print(f"Error in processing: {e}")
237
- raise
238
 
239
  # ------- UI Interaction Functions -------
240
 
@@ -295,7 +133,7 @@ def update_caption_labels(image_paths):
295
  return updates
296
 
297
  def run_captioning(image_paths, batch_category):
298
- """Generate captions for the images using the CLI code path"""
299
  if not image_paths:
300
  return [gr.update(value="") for _ in range(MAX_IMAGES)] + [gr.update(value="No images to process")]
301
 
 
4
  from io import BytesIO
5
  import time
6
  import tempfile
 
7
  from pathlib import Path
8
+ import shutil
9
+
10
+ from main import process_images
11
 
12
  # Maximum number of images
13
  MAX_IMAGES = 30
 
33
 
34
  return zip_io.getvalue()
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def process_uploaded_images(image_paths, batch_by_category=False):
37
+ """Process uploaded images using main.py's functions"""
38
+ # Create temporary directories for input and output
39
+ with tempfile.TemporaryDirectory() as temp_input_dir, tempfile.TemporaryDirectory() as temp_output_dir:
40
+ # Copy all images to the temporary input directory
41
+ temp_input_path = Path(temp_input_dir)
42
+ temp_output_path = Path(temp_output_dir)
43
 
44
+ # Map of original paths to filenames in temp dir
45
+ path_mapping = {}
46
+
47
+ for i, path in enumerate(image_paths):
48
+ # Keep original filename to preserve categorization
49
+ filename = os.path.basename(path)
50
+ temp_path = temp_input_path / filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ # Copy file to temp directory
53
+ shutil.copy2(path, temp_path)
54
+ path_mapping[str(temp_path)] = str(path)
 
 
 
 
55
 
56
+ # Process the images using main.py's function
57
+ process_images(temp_input_dir, temp_output_dir, fix_outfit=False, batch_images=batch_by_category)
58
+
59
+ # Collect the captions from the output directory
60
+ captions = []
61
+ for orig_path in image_paths:
62
+ # Get the base filename without extension
63
+ base_name = os.path.splitext(os.path.basename(orig_path))[0]
64
+ caption_filename = f"{base_name}.txt"
65
+ caption_path = temp_output_path / caption_filename
66
 
67
+ # If caption file exists, read it; otherwise use empty string
68
+ if os.path.exists(caption_path):
69
+ with open(caption_path, 'r', encoding='utf-8') as f:
70
+ caption = f.read()
71
+ captions.append(caption)
72
  else:
73
+ captions.append("")
74
+
75
+ return captions
 
 
 
 
 
 
76
 
77
  # ------- UI Interaction Functions -------
78
 
 
133
  return updates
134
 
135
  def run_captioning(image_paths, batch_category):
136
+ """Generate captions for the images using the main.py functions"""
137
  if not image_paths:
138
  return [gr.update(value="") for _ in range(MAX_IMAGES)] + [gr.update(value="No images to process")]
139