Deadmon commited on
Commit
def6f3f
·
verified ·
1 Parent(s): b700ceb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -40
app.py CHANGED
@@ -38,21 +38,21 @@ API_CONFIGS = {
38
  "headers": {"Authorization": "klHADHclpse2e2xSP9h747AgfE1Rx0wioemGhXYtedjZzvJ1WBUKwz7g"},
39
  "image_key": "src.medium",
40
  "result_key": "photos",
41
- "delay": 2 # Pexels rate limit: 200/hour free tier (~1 request every 18s, using 2s for safety)
42
  },
43
  "unsplash": {
44
  "base_url": "https://api.unsplash.com/search/photos",
45
  "headers": {"Authorization": "Client-ID UKkhpD_Rs5-s1gIlVX28iNs_8E4ysPhQniyIpDpKUnU"},
46
  "image_key": "urls.small",
47
  "result_key": "results",
48
- "delay": 2 # Unsplash rate limit: 50/hour free tier (~1 request every 72s, using 2s for safety)
49
  },
50
  "pixabay": {
51
  "base_url": "https://pixabay.com/api/",
52
  "headers": {},
53
  "image_key": "webformatURL",
54
  "result_key": "hits",
55
- "delay": 1 # Pixabay has higher limits (5000/hour), so 1s is fine
56
  }
57
  }
58
 
@@ -69,7 +69,7 @@ def load_used_pages():
69
  return data
70
 
71
  def save_used_pages(data):
72
- """Save the used pages tracking file."""
73
  with open(TRACKING_FILE, "w") as f:
74
  json.dump(data, f)
75
 
@@ -96,6 +96,7 @@ def fetch_image_urls(api_name, category, num_images):
96
  pages = get_available_pages(num_pages_needed, api_name)
97
 
98
  if not pages:
 
99
  return []
100
 
101
  image_urls = []
@@ -108,11 +109,15 @@ def fetch_image_urls(api_name, category, num_images):
108
  url = f"{config['base_url']}?query={category.lower()}&per_page={ITEMS_PER_PAGE}&page={page}"
109
 
110
  try:
 
111
  time.sleep(config.get("delay", 0)) # Respect API rate limits
112
  response = requests.get(url, headers=config["headers"])
113
  response.raise_for_status()
114
  data_response = response.json()
115
 
 
 
 
116
  # Validate response contains expected key
117
  if config["result_key"] not in data_response or not data_response[config["result_key"]]:
118
  logger.warning(f"No {config['result_key']} found for page {page} from {api_name}")
@@ -125,29 +130,39 @@ def fetch_image_urls(api_name, category, num_images):
125
  image_url = item.get(config["image_key"])
126
  if image_url:
127
  page_urls.append(image_url)
 
 
128
  if page_urls:
129
  image_urls.extend(page_urls)
130
  data["used_pages"].setdefault(api_name, []).append(page)
131
  save_used_pages(data)
132
  logger.info(f"Successfully fetched {len(page_urls)} images from page {page} for {api_name}")
 
 
133
  except requests.exceptions.RequestException as e:
134
  logger.error(f"Error fetching page {page} from {api_name}: {e}")
135
- if response.status_code == 401:
136
- logger.error(f"401 Unauthorized for {api_name}. Check your API key.")
137
- elif response.status_code == 400:
138
- logger.error(f"400 Bad Request for {api_name}. Check parameters or API key.")
139
- elif response.status_code == 429:
140
- logger.error(f"429 Rate Limit Exceeded for {api_name}. Wait before retrying.")
141
- if page in data["used_pages"].get(api_name, []):
142
- data["used_pages"][api_name].remove(page)
143
- save_used_pages(data)
144
- logger.info(f"Removed failed page {page} from {api_name}")
 
145
  break
146
 
 
147
  return image_urls[:num_images]
148
 
149
  def download_images(image_urls):
150
  """Download images from the provided URLs and save to IMAGES_DIR."""
 
 
 
 
151
  if os.path.exists(IMAGES_DIR):
152
  shutil.rmtree(IMAGES_DIR)
153
  os.makedirs(IMAGES_DIR, exist_ok=True)
@@ -156,20 +171,22 @@ def download_images(image_urls):
156
  image_paths = []
157
  for idx, url in enumerate(image_urls, 1):
158
  try:
159
- response = requests.get(url, stream=True)
 
160
  response.raise_for_status()
161
  image_path = os.path.join(IMAGES_DIR, f"img{idx}.jpg")
162
  with open(image_path, "wb") as f:
163
  for chunk in response.iter_content(chunk_size=8192):
164
  if chunk:
165
  f.write(chunk)
166
- Image.open(image_path).verify()
167
  downloaded_count += 1
168
  image_paths.append(image_path)
169
- print(f"Downloaded {idx}/{len(image_urls)}: {url}")
170
  except Exception as e:
171
- print(f"Error downloading {url}: {e}")
172
 
 
173
  return downloaded_count, image_paths
174
 
175
  def create_zip_file(selected_image_paths):
@@ -190,11 +207,13 @@ def process_and_display(api_name, category, num_images):
190
 
191
  image_urls = fetch_image_urls(api_name, category, num_images)
192
  if not image_urls:
193
- return "No unique images available today or API limit reached. Check logs for details.", None, [], [None] * TOTAL_IMAGES, [False] * TOTAL_IMAGES
 
194
 
195
  downloaded_count, image_paths = download_images(image_urls)
196
  if downloaded_count == 0:
197
- return "No images were successfully downloaded.", None, [], [None] * TOTAL_IMAGES, [False] * TOTAL_IMAGES
 
198
 
199
  status = f"Successfully downloaded {downloaded_count}/{num_images} images from {api_name}. Select images to include in ZIP below."
200
  image_outputs = [image_paths[i] if i < len(image_paths) else None for i in range(TOTAL_IMAGES)]
@@ -372,23 +391,4 @@ with gr.Blocks(title="Stock Photo Downloader", css=css) as demo:
372
  zip_path,
373
  image_paths,
374
  *[gr.Image(value=img, visible=img is not None, label=f"Image {i+1}", height=150, width=150) if img else gr.Image(value=None, visible=False) for i, img in enumerate(image_outs)],
375
- *[gr.Checkbox(value=chk, visible=i < len(image_paths), label=f"Image {i+1}", scale=0) for i, chk in enumerate(checkbox_outs)]
376
- )
377
-
378
- def on_submit(image_paths, *checkbox_states):
379
- status, zip_path = process_zip_submission(image_paths, *checkbox_states)
380
- return status, gr.File(value=zip_path, visible=True) if zip_path else gr.File(visible=False)
381
-
382
- download_button.click(
383
- fn=on_download,
384
- inputs=[api_input, category_input, num_images_input],
385
- outputs=[status_output, zip_output, image_paths_state] + image_outputs + checkbox_outputs
386
- )
387
-
388
- submit_button.click(
389
- fn=on_submit,
390
- inputs=[image_paths_state] + checkbox_outputs,
391
- outputs=[status_output, zip_output]
392
- )
393
-
394
- demo.launch()
 
38
  "headers": {"Authorization": "klHADHclpse2e2xSP9h747AgfE1Rx0wioemGhXYtedjZzvJ1WBUKwz7g"},
39
  "image_key": "src.medium",
40
  "result_key": "photos",
41
+ "delay": 2
42
  },
43
  "unsplash": {
44
  "base_url": "https://api.unsplash.com/search/photos",
45
  "headers": {"Authorization": "Client-ID UKkhpD_Rs5-s1gIlVX28iNs_8E4ysPhQniyIpDpKUnU"},
46
  "image_key": "urls.small",
47
  "result_key": "results",
48
+ "delay": 2
49
  },
50
  "pixabay": {
51
  "base_url": "https://pixabay.com/api/",
52
  "headers": {},
53
  "image_key": "webformatURL",
54
  "result_key": "hits",
55
+ "delay": 1
56
  }
57
  }
58
 
 
69
  return data
70
 
71
  def save_used_pages(data):
72
+ """Save the used_pages tracking file."""
73
  with open(TRACKING_FILE, "w") as f:
74
  json.dump(data, f)
75
 
 
96
  pages = get_available_pages(num_pages_needed, api_name)
97
 
98
  if not pages:
99
+ logger.error(f"No available pages for {api_name}")
100
  return []
101
 
102
  image_urls = []
 
109
  url = f"{config['base_url']}?query={category.lower()}&per_page={ITEMS_PER_PAGE}&page={page}"
110
 
111
  try:
112
+ logger.info(f"Fetching from {api_name} URL: {url}")
113
  time.sleep(config.get("delay", 0)) # Respect API rate limits
114
  response = requests.get(url, headers=config["headers"])
115
  response.raise_for_status()
116
  data_response = response.json()
117
 
118
+ # Log the raw response for debugging
119
+ logger.debug(f"{api_name} response: {json.dumps(data_response, indent=2)}")
120
+
121
  # Validate response contains expected key
122
  if config["result_key"] not in data_response or not data_response[config["result_key"]]:
123
  logger.warning(f"No {config['result_key']} found for page {page} from {api_name}")
 
130
  image_url = item.get(config["image_key"])
131
  if image_url:
132
  page_urls.append(image_url)
133
+ else:
134
+ logger.warning(f"No {config['image_key']} found in item: {item}")
135
  if page_urls:
136
  image_urls.extend(page_urls)
137
  data["used_pages"].setdefault(api_name, []).append(page)
138
  save_used_pages(data)
139
  logger.info(f"Successfully fetched {len(page_urls)} images from page {page} for {api_name}")
140
+ else:
141
+ logger.warning(f"No valid image URLs found on page {page} for {api_name}")
142
  except requests.exceptions.RequestException as e:
143
  logger.error(f"Error fetching page {page} from {api_name}: {e}")
144
+ if "response" in locals():
145
+ if response.status_code == 401:
146
+ logger.error(f"401 Unauthorized for {api_name}. Check your API key.")
147
+ elif response.status_code == 400:
148
+ logger.error(f"400 Bad Request for {api_name}. Check parameters or API key.")
149
+ elif response.status_code == 429:
150
+ logger.error(f"429 Rate Limit Exceeded for {api_name}. Wait before retrying.")
151
+ if page in data["used_pages"].get(api_name, []):
152
+ data["used_pages"][api_name].remove(page)
153
+ save_used_pages(data)
154
+ logger.info(f"Removed failed page {page} from {api_name}")
155
  break
156
 
157
+ logger.info(f"Total image URLs fetched from {api_name}: {len(image_urls)}")
158
  return image_urls[:num_images]
159
 
160
  def download_images(image_urls):
161
  """Download images from the provided URLs and save to IMAGES_DIR."""
162
+ if not image_urls:
163
+ logger.error("No image URLs provided to download")
164
+ return 0, []
165
+
166
  if os.path.exists(IMAGES_DIR):
167
  shutil.rmtree(IMAGES_DIR)
168
  os.makedirs(IMAGES_DIR, exist_ok=True)
 
171
  image_paths = []
172
  for idx, url in enumerate(image_urls, 1):
173
  try:
174
+ logger.info(f"Downloading image {idx}/{len(image_urls)}: {url}")
175
+ response = requests.get(url, stream=True, timeout=10)
176
  response.raise_for_status()
177
  image_path = os.path.join(IMAGES_DIR, f"img{idx}.jpg")
178
  with open(image_path, "wb") as f:
179
  for chunk in response.iter_content(chunk_size=8192):
180
  if chunk:
181
  f.write(chunk)
182
+ Image.open(image_path).verify() # Verify it's a valid image
183
  downloaded_count += 1
184
  image_paths.append(image_path)
185
+ logger.info(f"Downloaded {idx}/{len(image_urls)}: {url}")
186
  except Exception as e:
187
+ logger.error(f"Error downloading {url}: {e}")
188
 
189
+ logger.info(f"Total images downloaded: {downloaded_count}")
190
  return downloaded_count, image_paths
191
 
192
  def create_zip_file(selected_image_paths):
 
207
 
208
  image_urls = fetch_image_urls(api_name, category, num_images)
209
  if not image_urls:
210
+ logger.error(f"No images fetched from {api_name} for category {category}")
211
+ return f"No unique images available from {api_name} today or API limit reached. Check logs for details.", None, [], [None] * TOTAL_IMAGES, [False] * TOTAL_IMAGES
212
 
213
  downloaded_count, image_paths = download_images(image_urls)
214
  if downloaded_count == 0:
215
+ logger.error(f"No images downloaded from {api_name}")
216
+ return f"No images were successfully downloaded from {api_name}.", None, [], [None] * TOTAL_IMAGES, [False] * TOTAL_IMAGES
217
 
218
  status = f"Successfully downloaded {downloaded_count}/{num_images} images from {api_name}. Select images to include in ZIP below."
219
  image_outputs = [image_paths[i] if i < len(image_paths) else None for i in range(TOTAL_IMAGES)]
 
391
  zip_path,
392
  image_paths,
393
  *[gr.Image(value=img, visible=img is not None, label=f"Image {i+1}", height=150, width=150) if img else gr.Image(value=None, visible=False) for i, img in enumerate(image_outs)],
394
+ *[gr.Chec