Deadmon commited on
Commit
3eb9780
·
verified ·
1 Parent(s): 9e2e26a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -287
app.py CHANGED
@@ -7,276 +7,7 @@ from PIL import Image
7
  import json
8
  from datetime import date
9
  import random
10
-
11
- # Configuration
12
- OUTPUT_DIR = "downloaded_images"
13
- IMAGES_DIR = os.path.join(OUTPUT_DIR, "images")
14
- ZIP_FILE = os.path.join(OUTPUT_DIR, "images.zip")
15
- TRACKING_FILE = os.path.join(OUTPUT_DIR, "used_pages.json")
16
-
17
- # Ensure output directory exists
18
- os.makedirs(OUTPUT_DIR, exist_ok=True)
19
-
20
- # Constants
21
- ITEMS_PER_PAGE = 40
22
- DAILY_IMAGE_LIMIT = 2000 # Adjusted for free tier limits
23
- MAX_PAGES = DAILY_IMAGE_LIMIT // ITEMS_PER_PAGE
24
-
25
- # API Configurations
26
- API_CONFIGS = {
27
- "pexels": {
28
- "base_url": "https://api.pexels.com/v1/search",
29
- "headers": {"Authorization": "klHADHclpse2e2xSP9h747AgfE1Rx0wioemGhXYtedjZzvJ1WBUKwz7g"},
30
- "image_key": "src.medium",
31
- "result_key": "photos"
32
- },
33
- "unsplash": {
34
- "base_url": "https://api.unsplash.com/search/photos",
35
- "headers": {"Authorization": "722961 na6HV6Ym7dCeK1cZM5GRkWpNmhWsV1ZwusOpkTaCL9U"},
36
- "image_key": "urls.small",
37
- "result_key": "results"
38
- },
39
- "pixabay": {
40
- "base_url": "https://pixabay.com/api/?key=45122300-cd3621e1539e8e95430ee3efc&q={category}&per_page={ITEMS_PER_PAGE}&page={page}",
41
- "headers": {},
42
- "image_key": "webformatURL",
43
- "result_key": "hits"
44
- }
45
- }
46
-
47
- def load_used_pages():
48
- """Load or initialize the used pages tracking file."""
49
- today = str(date.today())
50
- if os.path.exists(TRACKING_FILE):
51
- with open(TRACKING_FILE, "r") as f:
52
- data = json.load(f)
53
- if data.get("date") != today:
54
- data = {"date": today, "used_pages": {}}
55
- else:
56
- data = {"date": today, "used_pages": {}}
57
- return data
58
-
59
- def save_used_pages(data):
60
- """Save the used pages tracking file."""
61
- with open(TRACKING_FILE, "w") as f:
62
- json.dump(data, f)
63
-
64
- def get_available_pages(num_pages_needed, api_name):
65
- """Get a list of unused page numbers for the specified API."""
66
- data = load_used_pages()
67
- used_pages = set(data["used_pages"].get(api_name, []))
68
- all_pages = set(range(1, MAX_PAGES + 1))
69
- available_pages = list(all_pages - used_pages)
70
-
71
- if len(available_pages) < num_pages_needed:
72
- return None
73
-
74
- selected_pages = random.sample(available_pages, num_pages_needed)
75
- if api_name not in data["used_pages"]:
76
- data["used_pages"][api_name] = []
77
- data["used_pages"][api_name].extend(selected_pages)
78
- save_used_pages(data)
79
- return selected_pages
80
-
81
- def fetch_image_urls(api_name, category, num_images):
82
- """Fetch image URLs from the specified API based on category and desired number of images."""
83
- config = API_CONFIGS[api_name]
84
- num_pages_needed = (num_images + ITEMS_PER_PAGE - 1) // ITEMS_PER_PAGE
85
- pages = get_available_pages(num_pages_needed, api_name)
86
-
87
- if not pages:
88
- return []
89
-
90
- image_urls = []
91
- for page in pages:
92
- if api_name == "pixabay":
93
- url = config["base_url"].format(category=category.lower(), page=page, ITEMS_PER_PAGE=ITEMS_PER_PAGE)
94
- else:
95
- url = f"{config['base_url']}?query={category}&per_page={ITEMS_PER_PAGE}&page={page}"
96
- try:
97
- response = requests.get(url, headers=config["headers"])
98
- response.raise_for_status()
99
- data = response.json()
100
-
101
- if config["result_key"] not in data or not data[config["result_key"]]:
102
- break
103
-
104
- for item in data[config["result_key"]]:
105
- if len(image_urls) >= num_images:
106
- break
107
- image_url = item.get(config["image_key"])
108
- if image_url:
109
- image_urls.append(image_url)
110
- except requests.exceptions.RequestException as e:
111
- print(f"Error fetching page {page} from {api_name}: {e}")
112
- break
113
-
114
- return image_urls[:num_images]
115
-
116
- def download_images(image_urls):
117
- """Download images from the provided URLs and save to IMAGES_DIR."""
118
- if os.path.exists(IMAGES_DIR):
119
- shutil.rmtree(IMAGES_DIR)
120
- os.makedirs(IMAGES_DIR, exist_ok=True)
121
-
122
- downloaded_count = 0
123
- image_paths = []
124
- for idx, url in enumerate(image_urls, 1):
125
- try:
126
- response = requests.get(url, stream=True)
127
- response.raise_for_status()
128
- image_path = os.path.join(IMAGES_DIR, f"img{idx}.jpg")
129
- with open(image_path, "wb") as f:
130
- for chunk in response.iter_content(chunk_size=8192):
131
- if chunk:
132
- f.write(chunk)
133
- Image.open(image_path).verify()
134
- downloaded_count += 1
135
- image_paths.append(image_path)
136
- print(f"Downloaded {idx}/{len(image_urls)}: {url}")
137
- except Exception as e:
138
- print(f"Error downloading {url}: {e}")
139
-
140
- return downloaded_count, image_paths
141
-
142
- def create_zip_file(selected_image_paths):
143
- """Create a ZIP file of the selected images."""
144
- if os.path.exists(ZIP_FILE):
145
- os.remove(ZIP_FILE)
146
- with zipfile.ZipFile(ZIP_FILE, 'w', zipfile.ZIP_DEFLATED) as zipf:
147
- for image_path in selected_image_paths:
148
- arcname = os.path.relpath(image_path, OUTPUT_DIR)
149
- zipf.write(image_path, arcname)
150
- return ZIP_FILE
151
-
152
- def process_and_display(api_name, category, num_images):
153
- """Fetch and download images, then prepare data for display."""
154
- num_images = int(num_images)
155
- if num_images > 24:
156
- num_images = 24
157
-
158
- image_urls = fetch_image_urls(api_name, category, num_images)
159
- if not image_urls:
160
- return "No unique images available today or API limit reached.", None, None, [None] * 24, [False] * 24
161
-
162
- downloaded_count, image_paths = download_images(image_urls)
163
- if downloaded_count == 0:
164
- return "No images were successfully downloaded.", None, None, [None] * 24, [False] * 24
165
-
166
- status = f"Successfully downloaded {downloaded_count}/{num_images} images from {api_name}. Select images to include in ZIP below."
167
- image_outputs = [image_paths[i] if i < len(image_paths) else None for i in range(24)]
168
- checkbox_outputs = [True if i < len(image_paths) else False for i in range(24)]
169
-
170
- return status, None, image_paths, image_outputs, checkbox_outputs
171
-
172
- def process_zip_submission(image_paths, *checkbox_states):
173
- """Create a ZIP file based on the selected images."""
174
- if not image_paths:
175
- return "No images available to process.", None
176
-
177
- selected_image_paths = [image_paths[i] for i, state in enumerate(checkbox_states) if state]
178
- if not selected_image_paths:
179
- return "No images selected for ZIP.", None
180
-
181
- zip_path = create_zip_file(selected_image_paths)
182
- return f"ZIP file created with {len(selected_image_paths)} images at {zip_path}", zip_path
183
-
184
- # Gradio Interface
185
- with gr.Blocks(title="Stock Photo Downloader") as demo:
186
- gr.Markdown("### Select Parameters to Download Stock Photos")
187
- api_input = gr.Dropdown(
188
- label="API Source",
189
- choices=["pexels", "unsplash", "pixabay"],
190
- value="pexels"
191
- )
192
- category_input = gr.Dropdown(
193
- label="Category",
194
- choices=["nature", "business", "people", "technology", "food", "travel", "animals", "fashion"],
195
- value="nature",
196
- allow_custom_value=True
197
- )
198
- num_images_input = gr.Dropdown(
199
- label="Number of Images (Max 24)",
200
- choices=["4", "8", "12", "16", "20", "24"],
201
- value="4"
202
- )
203
- download_button = gr.Button("Fetch and Display Images")
204
-
205
- gr.Markdown("### Download Status")
206
- status_output = gr.Textbox(label="Status", interactive=False)
207
-
208
- gr.Markdown("### Download Your Images")
209
- zip_output = gr.File(label="Download ZIP", visible=False)
210
-
211
- gr.Markdown("### Image Gallery (Click Thumbnails to View Full Size)")
212
- image_paths_state = gr.State()
213
-
214
- IMAGES_PER_ROW = 4
215
- MAX_ROWS = 6
216
- TOTAL_IMAGES = IMAGES_PER_ROW * MAX_ROWS
217
-
218
- image_outputs = []
219
- checkbox_outputs = []
220
-
221
- for row in range(MAX_ROWS):
222
- with gr.Row():
223
- for col in range(IMAGES_PER_ROW):
224
- idx = row * IMAGES_PER_ROW + col
225
- with gr.Column(min_width=150):
226
- image_output = gr.Image(
227
- label=f"Image {idx+1}",
228
- visible=False,
229
- height=150,
230
- width=150
231
- )
232
- checkbox_output = gr.Checkbox(
233
- label=f"Include in ZIP",
234
- value=True,
235
- visible=False
236
- )
237
- image_outputs.append(image_output)
238
- checkbox_outputs.append(checkbox_output)
239
-
240
- gr.Markdown("### Submit Selections")
241
- submit_button = gr.Button("Create ZIP of Selected Images")
242
-
243
- def on_download(api_name, category, num_images):
244
- status, zip_path, image_paths, image_outs, checkbox_outs = process_and_display(api_name, category, num_images)
245
- return (
246
- status,
247
- zip_path,
248
- image_paths,
249
- *[gr.Image(value=img, visible=img is not None, label=f"Image {i+1}", height=150, width=150) if img else gr.Image(value=None, visible=False) for i, img in enumerate(image_outs)],
250
- *[gr.Checkbox(value=chk, visible=chk, label=f"Include in ZIP") if chk else gr.Checkbox(value=False, visible=False) for chk in checkbox_outs]
251
- )
252
-
253
- def on_submit(image_paths, *checkbox_states):
254
- status, zip_path = process_zip_submission(image_paths, *checkbox_states)
255
- return status, gr.File(value=zip_path, visible=True) if zip_path else gr.File(visible=False)
256
-
257
- download_button.click(
258
- fn=on_download,
259
- inputs=[api_input, category_input, num_images_input],
260
- outputs=[status_output, zip_output, image_paths_state] + image_outputs + checkbox_outputs
261
- )
262
-
263
- submit_button.click(
264
- fn=on_submit,
265
- inputs=[image_paths_state] + checkbox_outputs,
266
- outputs=[status_output, zip_output]
267
- )
268
-
269
- demo.launch()
270
-
271
- import os
272
- import requests
273
- import zipfile
274
- import gradio as gr
275
- import shutil
276
- from PIL import Image
277
- import json
278
- from datetime import date
279
- import random
280
  import logging
281
 
282
  # Configure logging
@@ -299,23 +30,26 @@ MAX_PAGES = DAILY_IMAGE_LIMIT // ITEMS_PER_PAGE
299
 
300
  # API Configurations (Replace with valid keys)
301
  API_CONFIGS = {
302
- "pexels": {
303
  "base_url": "https://api.pexels.com/v1/search",
304
  "headers": {"Authorization": "klHADHclpse2e2xSP9h747AgfE1Rx0wioemGhXYtedjZzvJ1WBUKwz7g"},
305
  "image_key": "src.medium",
306
- "result_key": "photos"
 
307
  },
308
  "unsplash": {
309
  "base_url": "https://api.unsplash.com/search/photos",
310
- "headers": {"Authorization": "722961 na6HV6Ym7dCeK1cZM5GRkWpNmhWsV1ZwusOpkTaCL9U"},
311
  "image_key": "urls.small",
312
- "result_key": "results"
 
313
  },
314
  "pixabay": {
315
- "base_url": "https://pixabay.com/api/?key=45122300-cd3621e1539e8e95430ee3efc&q={category}&per_page={ITEMS_PER_PAGE}&page={page}",
316
  "headers": {},
317
  "image_key": "webformatURL",
318
- "result_key": "hits"
 
319
  }
320
  }
321
 
@@ -366,19 +100,21 @@ def fetch_image_urls(api_name, category, num_images):
366
  return []
367
 
368
  image_urls = []
 
369
  for page in pages:
370
  if api_name == "pixabay":
371
  url = config["base_url"].format(category=category.lower(), page=page, ITEMS_PER_PAGE=ITEMS_PER_PAGE)
372
  else:
373
  url = f"{config['base_url']}?query={category}&per_page={ITEMS_PER_PAGE}&page={page}"
374
  try:
 
375
  response = requests.get(url, headers=config["headers"])
376
  response.raise_for_status()
377
  data = response.json()
378
 
379
  if config["result_key"] not in data or not data[config["result_key"]]:
380
  logger.warning(f"No results for page {page} from {api_name}")
381
- break
382
 
383
  for item in data[config["result_key"]]:
384
  if len(image_urls) >= num_images:
@@ -386,19 +122,23 @@ def fetch_image_urls(api_name, category, num_images):
386
  image_url = item.get(config["image_key"])
387
  if image_url:
388
  image_urls.append(image_url)
389
- # Only mark page as used if images are successfully fetched
390
- if not image_urls:
391
- data["used_pages"][api_name].remove(page)
392
- save_used_pages(data)
393
- logger.info(f"Removed unused page {page} for {api_name}")
 
394
  except requests.exceptions.RequestException as e:
395
  logger.error(f"Error fetching page {page} from {api_name}: {e}")
396
  if response.status_code == 401:
397
- logger.error(f"401 Unauthorized for {api_name}. Check API key.")
398
  elif response.status_code == 400:
399
- logger.error(f"400 Bad Request for {api_name}. Check URL or API key.")
400
- data["used_pages"][api_name].remove(page)
401
- save_used_pages(data)
 
 
 
402
  break
403
 
404
  return image_urls[:num_images]
@@ -447,7 +187,7 @@ def process_and_display(api_name, category, num_images):
447
 
448
  image_urls = fetch_image_urls(api_name, category, num_images)
449
  if not image_urls:
450
- return "No unique images available today or API limit reached.", None, None, [None] * 24, [False] * 24
451
 
452
  downloaded_count, image_paths = download_images(image_urls)
453
  if downloaded_count == 0:
 
7
  import json
8
  from datetime import date
9
  import random
10
+ import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  import logging
12
 
13
  # Configure logging
 
30
 
31
  # API Configurations (Replace with valid keys)
32
  API_CONFIGS = {
33
+ "pexels": {
34
  "base_url": "https://api.pexels.com/v1/search",
35
  "headers": {"Authorization": "klHADHclpse2e2xSP9h747AgfE1Rx0wioemGhXYtedjZzvJ1WBUKwz7g"},
36
  "image_key": "src.medium",
37
+ "result_key": "photos",
38
+ "delay": 1 # Seconds between requests to avoid rate limits
39
  },
40
  "unsplash": {
41
  "base_url": "https://api.unsplash.com/search/photos",
42
+ "headers": {"Authorization": "Client-ID YOUR_NEW_UNSPLASH_API_KEY"}, # Replace with new key
43
  "image_key": "urls.small",
44
+ "result_key": "results",
45
+ "delay": 2 # Stricter limit (50 requests/hour)
46
  },
47
  "pixabay": {
48
+ "base_url": "https://pixabay.com/api/?key=YOUR_NEW_PIXABAY_API_KEY&q={category}&per_page={ITEMS_PER_PAGE}&page={page}",
49
  "headers": {},
50
  "image_key": "webformatURL",
51
+ "result_key": "hits",
52
+ "delay": 1
53
  }
54
  }
55
 
 
100
  return []
101
 
102
  image_urls = []
103
+ data = load_used_pages() # Load current data to modify used_pages
104
  for page in pages:
105
  if api_name == "pixabay":
106
  url = config["base_url"].format(category=category.lower(), page=page, ITEMS_PER_PAGE=ITEMS_PER_PAGE)
107
  else:
108
  url = f"{config['base_url']}?query={category}&per_page={ITEMS_PER_PAGE}&page={page}"
109
  try:
110
+ time.sleep(config.get("delay", 0)) # Add delay to respect rate limits
111
  response = requests.get(url, headers=config["headers"])
112
  response.raise_for_status()
113
  data = response.json()
114
 
115
  if config["result_key"] not in data or not data[config["result_key"]]:
116
  logger.warning(f"No results for page {page} from {api_name}")
117
+ continue # Skip to next page instead of breaking
118
 
119
  for item in data[config["result_key"]]:
120
  if len(image_urls) >= num_images:
 
122
  image_url = item.get(config["image_key"])
123
  if image_url:
124
  image_urls.append(image_url)
125
+ # Mark page as used only if images are fetched
126
+ if image_urls:
127
+ if page not in data["used_pages"].get(api_name, []):
128
+ data["used_pages"].setdefault(api_name, []).append(page)
129
+ save_used_pages(data)
130
+ logger.info(f"Marked page {page} as used for {api_name}")
131
  except requests.exceptions.RequestException as e:
132
  logger.error(f"Error fetching page {page} from {api_name}: {e}")
133
  if response.status_code == 401:
134
+ logger.error(f"401 Unauthorized for {api_name}. Please regenerate your API key at https://unsplash.com/developers (Unsplash) or similar.")
135
  elif response.status_code == 400:
136
+ logger.error(f"400 Bad Request for {api_name}. Check API key or ensure page {page} is valid.")
137
+ # Remove failed page from used_pages
138
+ if page in data["used_pages"].get(api_name, []):
139
+ data["used_pages"][api_name].remove(page)
140
+ save_used_pages(data)
141
+ logger.info(f"Removed failed page {page} from {api_name}")
142
  break
143
 
144
  return image_urls[:num_images]
 
187
 
188
  image_urls = fetch_image_urls(api_name, category, num_images)
189
  if not image_urls:
190
+ return "No unique images available today or API limit reached. Check logs for details.", None, None, [None] * 24, [False] * 24
191
 
192
  downloaded_count, image_paths = download_images(image_urls)
193
  if downloaded_count == 0: