Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -38,21 +38,21 @@ API_CONFIGS = {
|
|
38 |
"headers": {"Authorization": "klHADHclpse2e2xSP9h747AgfE1Rx0wioemGhXYtedjZzvJ1WBUKwz7g"},
|
39 |
"image_key": "src.medium",
|
40 |
"result_key": "photos",
|
41 |
-
"delay": 2
|
42 |
},
|
43 |
"unsplash": {
|
44 |
"base_url": "https://api.unsplash.com/search/photos",
|
45 |
"headers": {"Authorization": "Client-ID UKkhpD_Rs5-s1gIlVX28iNs_8E4ysPhQniyIpDpKUnU"},
|
46 |
"image_key": "urls.small",
|
47 |
"result_key": "results",
|
48 |
-
"delay": 2
|
49 |
},
|
50 |
"pixabay": {
|
51 |
"base_url": "https://pixabay.com/api/",
|
52 |
"headers": {},
|
53 |
"image_key": "webformatURL",
|
54 |
"result_key": "hits",
|
55 |
-
"delay": 1
|
56 |
}
|
57 |
}
|
58 |
|
@@ -69,7 +69,7 @@ def load_used_pages():
|
|
69 |
return data
|
70 |
|
71 |
def save_used_pages(data):
|
72 |
-
"""Save the
|
73 |
with open(TRACKING_FILE, "w") as f:
|
74 |
json.dump(data, f)
|
75 |
|
@@ -96,6 +96,7 @@ def fetch_image_urls(api_name, category, num_images):
|
|
96 |
pages = get_available_pages(num_pages_needed, api_name)
|
97 |
|
98 |
if not pages:
|
|
|
99 |
return []
|
100 |
|
101 |
image_urls = []
|
@@ -108,11 +109,15 @@ def fetch_image_urls(api_name, category, num_images):
|
|
108 |
url = f"{config['base_url']}?query={category.lower()}&per_page={ITEMS_PER_PAGE}&page={page}"
|
109 |
|
110 |
try:
|
|
|
111 |
time.sleep(config.get("delay", 0)) # Respect API rate limits
|
112 |
response = requests.get(url, headers=config["headers"])
|
113 |
response.raise_for_status()
|
114 |
data_response = response.json()
|
115 |
|
|
|
|
|
|
|
116 |
# Validate response contains expected key
|
117 |
if config["result_key"] not in data_response or not data_response[config["result_key"]]:
|
118 |
logger.warning(f"No {config['result_key']} found for page {page} from {api_name}")
|
@@ -125,29 +130,39 @@ def fetch_image_urls(api_name, category, num_images):
|
|
125 |
image_url = item.get(config["image_key"])
|
126 |
if image_url:
|
127 |
page_urls.append(image_url)
|
|
|
|
|
128 |
if page_urls:
|
129 |
image_urls.extend(page_urls)
|
130 |
data["used_pages"].setdefault(api_name, []).append(page)
|
131 |
save_used_pages(data)
|
132 |
logger.info(f"Successfully fetched {len(page_urls)} images from page {page} for {api_name}")
|
|
|
|
|
133 |
except requests.exceptions.RequestException as e:
|
134 |
logger.error(f"Error fetching page {page} from {api_name}: {e}")
|
135 |
-
if response
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
data["used_pages"][
|
143 |
-
|
144 |
-
|
|
|
145 |
break
|
146 |
|
|
|
147 |
return image_urls[:num_images]
|
148 |
|
149 |
def download_images(image_urls):
|
150 |
"""Download images from the provided URLs and save to IMAGES_DIR."""
|
|
|
|
|
|
|
|
|
151 |
if os.path.exists(IMAGES_DIR):
|
152 |
shutil.rmtree(IMAGES_DIR)
|
153 |
os.makedirs(IMAGES_DIR, exist_ok=True)
|
@@ -156,20 +171,22 @@ def download_images(image_urls):
|
|
156 |
image_paths = []
|
157 |
for idx, url in enumerate(image_urls, 1):
|
158 |
try:
|
159 |
-
|
|
|
160 |
response.raise_for_status()
|
161 |
image_path = os.path.join(IMAGES_DIR, f"img{idx}.jpg")
|
162 |
with open(image_path, "wb") as f:
|
163 |
for chunk in response.iter_content(chunk_size=8192):
|
164 |
if chunk:
|
165 |
f.write(chunk)
|
166 |
-
Image.open(image_path).verify()
|
167 |
downloaded_count += 1
|
168 |
image_paths.append(image_path)
|
169 |
-
|
170 |
except Exception as e:
|
171 |
-
|
172 |
|
|
|
173 |
return downloaded_count, image_paths
|
174 |
|
175 |
def create_zip_file(selected_image_paths):
|
@@ -190,11 +207,13 @@ def process_and_display(api_name, category, num_images):
|
|
190 |
|
191 |
image_urls = fetch_image_urls(api_name, category, num_images)
|
192 |
if not image_urls:
|
193 |
-
|
|
|
194 |
|
195 |
downloaded_count, image_paths = download_images(image_urls)
|
196 |
if downloaded_count == 0:
|
197 |
-
|
|
|
198 |
|
199 |
status = f"Successfully downloaded {downloaded_count}/{num_images} images from {api_name}. Select images to include in ZIP below."
|
200 |
image_outputs = [image_paths[i] if i < len(image_paths) else None for i in range(TOTAL_IMAGES)]
|
@@ -372,23 +391,4 @@ with gr.Blocks(title="Stock Photo Downloader", css=css) as demo:
|
|
372 |
zip_path,
|
373 |
image_paths,
|
374 |
*[gr.Image(value=img, visible=img is not None, label=f"Image {i+1}", height=150, width=150) if img else gr.Image(value=None, visible=False) for i, img in enumerate(image_outs)],
|
375 |
-
*[gr.
|
376 |
-
)
|
377 |
-
|
378 |
-
def on_submit(image_paths, *checkbox_states):
|
379 |
-
status, zip_path = process_zip_submission(image_paths, *checkbox_states)
|
380 |
-
return status, gr.File(value=zip_path, visible=True) if zip_path else gr.File(visible=False)
|
381 |
-
|
382 |
-
download_button.click(
|
383 |
-
fn=on_download,
|
384 |
-
inputs=[api_input, category_input, num_images_input],
|
385 |
-
outputs=[status_output, zip_output, image_paths_state] + image_outputs + checkbox_outputs
|
386 |
-
)
|
387 |
-
|
388 |
-
submit_button.click(
|
389 |
-
fn=on_submit,
|
390 |
-
inputs=[image_paths_state] + checkbox_outputs,
|
391 |
-
outputs=[status_output, zip_output]
|
392 |
-
)
|
393 |
-
|
394 |
-
demo.launch()
|
|
|
38 |
"headers": {"Authorization": "klHADHclpse2e2xSP9h747AgfE1Rx0wioemGhXYtedjZzvJ1WBUKwz7g"},
|
39 |
"image_key": "src.medium",
|
40 |
"result_key": "photos",
|
41 |
+
"delay": 2
|
42 |
},
|
43 |
"unsplash": {
|
44 |
"base_url": "https://api.unsplash.com/search/photos",
|
45 |
"headers": {"Authorization": "Client-ID UKkhpD_Rs5-s1gIlVX28iNs_8E4ysPhQniyIpDpKUnU"},
|
46 |
"image_key": "urls.small",
|
47 |
"result_key": "results",
|
48 |
+
"delay": 2
|
49 |
},
|
50 |
"pixabay": {
|
51 |
"base_url": "https://pixabay.com/api/",
|
52 |
"headers": {},
|
53 |
"image_key": "webformatURL",
|
54 |
"result_key": "hits",
|
55 |
+
"delay": 1
|
56 |
}
|
57 |
}
|
58 |
|
|
|
69 |
return data
|
70 |
|
71 |
def save_used_pages(data):
|
72 |
+
"""Save the used_pages tracking file."""
|
73 |
with open(TRACKING_FILE, "w") as f:
|
74 |
json.dump(data, f)
|
75 |
|
|
|
96 |
pages = get_available_pages(num_pages_needed, api_name)
|
97 |
|
98 |
if not pages:
|
99 |
+
logger.error(f"No available pages for {api_name}")
|
100 |
return []
|
101 |
|
102 |
image_urls = []
|
|
|
109 |
url = f"{config['base_url']}?query={category.lower()}&per_page={ITEMS_PER_PAGE}&page={page}"
|
110 |
|
111 |
try:
|
112 |
+
logger.info(f"Fetching from {api_name} URL: {url}")
|
113 |
time.sleep(config.get("delay", 0)) # Respect API rate limits
|
114 |
response = requests.get(url, headers=config["headers"])
|
115 |
response.raise_for_status()
|
116 |
data_response = response.json()
|
117 |
|
118 |
+
# Log the raw response for debugging
|
119 |
+
logger.debug(f"{api_name} response: {json.dumps(data_response, indent=2)}")
|
120 |
+
|
121 |
# Validate response contains expected key
|
122 |
if config["result_key"] not in data_response or not data_response[config["result_key"]]:
|
123 |
logger.warning(f"No {config['result_key']} found for page {page} from {api_name}")
|
|
|
130 |
image_url = item.get(config["image_key"])
|
131 |
if image_url:
|
132 |
page_urls.append(image_url)
|
133 |
+
else:
|
134 |
+
logger.warning(f"No {config['image_key']} found in item: {item}")
|
135 |
if page_urls:
|
136 |
image_urls.extend(page_urls)
|
137 |
data["used_pages"].setdefault(api_name, []).append(page)
|
138 |
save_used_pages(data)
|
139 |
logger.info(f"Successfully fetched {len(page_urls)} images from page {page} for {api_name}")
|
140 |
+
else:
|
141 |
+
logger.warning(f"No valid image URLs found on page {page} for {api_name}")
|
142 |
except requests.exceptions.RequestException as e:
|
143 |
logger.error(f"Error fetching page {page} from {api_name}: {e}")
|
144 |
+
if "response" in locals():
|
145 |
+
if response.status_code == 401:
|
146 |
+
logger.error(f"401 Unauthorized for {api_name}. Check your API key.")
|
147 |
+
elif response.status_code == 400:
|
148 |
+
logger.error(f"400 Bad Request for {api_name}. Check parameters or API key.")
|
149 |
+
elif response.status_code == 429:
|
150 |
+
logger.error(f"429 Rate Limit Exceeded for {api_name}. Wait before retrying.")
|
151 |
+
if page in data["used_pages"].get(api_name, []):
|
152 |
+
data["used_pages"][api_name].remove(page)
|
153 |
+
save_used_pages(data)
|
154 |
+
logger.info(f"Removed failed page {page} from {api_name}")
|
155 |
break
|
156 |
|
157 |
+
logger.info(f"Total image URLs fetched from {api_name}: {len(image_urls)}")
|
158 |
return image_urls[:num_images]
|
159 |
|
160 |
def download_images(image_urls):
|
161 |
"""Download images from the provided URLs and save to IMAGES_DIR."""
|
162 |
+
if not image_urls:
|
163 |
+
logger.error("No image URLs provided to download")
|
164 |
+
return 0, []
|
165 |
+
|
166 |
if os.path.exists(IMAGES_DIR):
|
167 |
shutil.rmtree(IMAGES_DIR)
|
168 |
os.makedirs(IMAGES_DIR, exist_ok=True)
|
|
|
171 |
image_paths = []
|
172 |
for idx, url in enumerate(image_urls, 1):
|
173 |
try:
|
174 |
+
logger.info(f"Downloading image {idx}/{len(image_urls)}: {url}")
|
175 |
+
response = requests.get(url, stream=True, timeout=10)
|
176 |
response.raise_for_status()
|
177 |
image_path = os.path.join(IMAGES_DIR, f"img{idx}.jpg")
|
178 |
with open(image_path, "wb") as f:
|
179 |
for chunk in response.iter_content(chunk_size=8192):
|
180 |
if chunk:
|
181 |
f.write(chunk)
|
182 |
+
Image.open(image_path).verify() # Verify it's a valid image
|
183 |
downloaded_count += 1
|
184 |
image_paths.append(image_path)
|
185 |
+
logger.info(f"Downloaded {idx}/{len(image_urls)}: {url}")
|
186 |
except Exception as e:
|
187 |
+
logger.error(f"Error downloading {url}: {e}")
|
188 |
|
189 |
+
logger.info(f"Total images downloaded: {downloaded_count}")
|
190 |
return downloaded_count, image_paths
|
191 |
|
192 |
def create_zip_file(selected_image_paths):
|
|
|
207 |
|
208 |
image_urls = fetch_image_urls(api_name, category, num_images)
|
209 |
if not image_urls:
|
210 |
+
logger.error(f"No images fetched from {api_name} for category {category}")
|
211 |
+
return f"No unique images available from {api_name} today or API limit reached. Check logs for details.", None, [], [None] * TOTAL_IMAGES, [False] * TOTAL_IMAGES
|
212 |
|
213 |
downloaded_count, image_paths = download_images(image_urls)
|
214 |
if downloaded_count == 0:
|
215 |
+
logger.error(f"No images downloaded from {api_name}")
|
216 |
+
return f"No images were successfully downloaded from {api_name}.", None, [], [None] * TOTAL_IMAGES, [False] * TOTAL_IMAGES
|
217 |
|
218 |
status = f"Successfully downloaded {downloaded_count}/{num_images} images from {api_name}. Select images to include in ZIP below."
|
219 |
image_outputs = [image_paths[i] if i < len(image_paths) else None for i in range(TOTAL_IMAGES)]
|
|
|
391 |
zip_path,
|
392 |
image_paths,
|
393 |
*[gr.Image(value=img, visible=img is not None, label=f"Image {i+1}", height=150, width=150) if img else gr.Image(value=None, visible=False) for i, img in enumerate(image_outs)],
|
394 |
+
*[gr.Chec
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|