make-stockimage-dataset

Sleeping

App Files Files Community

Deadmon commited on Mar 15

Commit

cda0cc0

verified ·

1 Parent(s): cf80828

Create app.py

Browse files

Files changed (1) hide show

app.py +269 -0

app.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import os
+import requests
+import zipfile
+import gradio as gr
+import shutil
+from PIL import Image
+import json
+from datetime import date
+import random
+# Configuration
+OUTPUT_DIR = "downloaded_images"
+IMAGES_DIR = os.path.join(OUTPUT_DIR, "images")
+ZIP_FILE = os.path.join(OUTPUT_DIR, "images.zip")
+TRACKING_FILE = os.path.join(OUTPUT_DIR, "used_pages.json")
+# Ensure output directory exists
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+# Constants
+ITEMS_PER_PAGE = 40
+DAILY_IMAGE_LIMIT = 2000  # Adjusted for free tier limits
+MAX_PAGES = DAILY_IMAGE_LIMIT // ITEMS_PER_PAGE
+# API Configurations
+API_CONFIGS = {
+    "pexels": {
+        "base_url": "https://api.pexels.com/v1/search",
+        "headers": {"Authorization": "Your_Pexels_API_Key"},
+        "image_key": "src.medium",
+        "result_key": "photos"
+    },
+    "unsplash": {
+        "base_url": "https://api.unsplash.com/search/photos",
+        "headers": {"Authorization": "Client-ID Your_Unsplash_API_Key"},
+        "image_key": "urls.small",
+        "result_key": "results"
+    },
+    "pixabay": {
+        "base_url": "https://pixabay.com/api/?key=Your_Pixabay_API_Key&q={category}&per_page={ITEMS_PER_PAGE}&page={page}",
+        "headers": {},
+        "image_key": "webformatURL",
+        "result_key": "hits"
+    }
+}
+def load_used_pages():
+    """Load or initialize the used pages tracking file."""
+    today = str(date.today())
+    if os.path.exists(TRACKING_FILE):
+        with open(TRACKING_FILE, "r") as f:
+            data = json.load(f)
+            if data.get("date") != today:
+                data = {"date": today, "used_pages": []}
+    else:
+        data = {"date": today, "used_pages": []}
+    return data
+def save_used_pages(data):
+    """Save the used pages tracking file."""
+    with open(TRACKING_FILE, "w") as f:
+        json.dump(data, f)
+def get_available_pages(num_pages_needed, api_name):
+    """Get a list of unused page numbers for the specified API."""
+    data = load_used_pages()
+    used_pages = set(data["used_pages"].get(api_name, []))
+    all_pages = set(range(1, MAX_PAGES + 1))
+    available_pages = list(all_pages - used_pages)
+    if len(available_pages) < num_pages_needed:
+        return None
+    selected_pages = random.sample(available_pages, num_pages_needed)
+    if api_name not in data["used_pages"]:
+        data["used_pages"][api_name] = []
+    data["used_pages"][api_name].extend(selected_pages)
+    save_used_pages(data)
+    return selected_pages
+def fetch_image_urls(api_name, category, num_images):
+    """Fetch image URLs from the specified API based on category and desired number of images."""
+    config = API_CONFIGS[api_name]
+    num_pages_needed = (num_images + ITEMS_PER_PAGE - 1) // ITEMS_PER_PAGE
+    pages = get_available_pages(num_pages_needed, api_name)
+    if not pages:
+        return []
+    image_urls = []
+    for page in pages:
+        if api_name == "pixabay":
+            url = config["base_url"].format(category=category.lower(), page=page, ITEMS_PER_PAGE=ITEMS_PER_PAGE)
+        else:
+            url = f"{config['base_url']}?query={category}&per_page={ITEMS_PER_PAGE}&page={page}"
+        try:
+            response = requests.get(url, headers=config["headers"])
+            response.raise_for_status()
+            data = response.json()
+            if config["result_key"] not in data or not data[config["result_key"]]:
+                break
+            for item in data[config["result_key"]]:
+                if len(image_urls) >= num_images:
+                    break
+                image_url = item.get(config["image_key"])
+                if image_url:
+                    image_urls.append(image_url)
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching page {page} from {api_name}: {e}")
+            break
+    return image_urls[:num_images]
+def download_images(image_urls):
+    """Download images from the provided URLs and save to IMAGES_DIR."""
+    if os.path.exists(IMAGES_DIR):
+        shutil.rmtree(IMAGES_DIR)
+    os.makedirs(IMAGES_DIR, exist_ok=True)
+    downloaded_count = 0
+    image_paths = []
+    for idx, url in enumerate(image_urls, 1):
+        try:
+            response = requests.get(url, stream=True)
+            response.raise_for_status()
+            image_path = os.path.join(IMAGES_DIR, f"img{idx}.jpg")
+            with open(image_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+            Image.open(image_path).verify()
+            downloaded_count += 1
+            image_paths.append(image_path)
+            print(f"Downloaded {idx}/{len(image_urls)}: {url}")
+        except Exception as e:
+            print(f"Error downloading {url}: {e}")
+    return downloaded_count, image_paths
+def create_zip_file(selected_image_paths):
+    """Create a ZIP file of the selected images."""
+    if os.path.exists(ZIP_FILE):
+        os.remove(ZIP_FILE)
+    with zipfile.ZipFile(ZIP_FILE, 'w', zipfile.ZIP_DEFLATED) as zipf:
+        for image_path in selected_image_paths:
+            arcname = os.path.relpath(image_path, OUTPUT_DIR)
+            zipf.write(image_path, arcname)
+    return ZIP_FILE
+def process_and_display(api_name, category, num_images):
+    """Fetch and download images, then prepare data for display."""
+    num_images = int(num_images)
+    if num_images > 24:
+        num_images = 24
+    image_urls = fetch_image_urls(api_name, category, num_images)
+    if not image_urls:
+        return "No unique images available today or API limit reached.", None, None, [None] * 24, [False] * 24
+    downloaded_count, image_paths = download_images(image_urls)
+    if downloaded_count == 0:
+        return "No images were successfully downloaded.", None, None, [None] * 24, [False] * 24
+    status = f"Successfully downloaded {downloaded_count}/{num_images} images from {api_name}. Select images to include in ZIP below."
+    image_outputs = [image_paths[i] if i < len(image_paths) else None for i in range(24)]
+    checkbox_outputs = [True if i < len(image_paths) else False for i in range(24)]
+    return status, None, image_paths, image_outputs, checkbox_outputs
+def process_zip_submission(image_paths, *checkbox_states):
+    """Create a ZIP file based on the selected images."""
+    if not image_paths:
+        return "No images available to process.", None
+    selected_image_paths = [image_paths[i] for i, state in enumerate(checkbox_states) if state]
+    if not selected_image_paths:
+        return "No images selected for ZIP.", None
+    zip_path = create_zip_file(selected_image_paths)
+    return f"ZIP file created with {len(selected_image_paths)} images at {zip_path}", zip_path
+# Gradio Interface
+with gr.Blocks(title="Stock Photo Downloader") as demo:
+    gr.Markdown("### Select Parameters to Download Stock Photos")
+    api_input = gr.Dropdown(
+        label="API Source",
+        choices=["pexels", "unsplash", "pixabay"],
+        value="pexels"
+    )
+    category_input = gr.Dropdown(
+        label="Category",
+        choices=["nature", "business", "people", "technology", "food", "travel", "animals", "fashion"],
+        value="nature",
+        allow_custom_value=True
+    )
+    num_images_input = gr.Dropdown(
+        label="Number of Images (Max 24)",
+        choices=["4", "8", "12", "16", "20", "24"],
+        value="4"
+    )
+    download_button = gr.Button("Fetch and Display Images")
+    gr.Markdown("### Download Status")
+    status_output = gr.Textbox(label="Status", interactive=False)
+    gr.Markdown("### Download Your Images")
+    zip_output = gr.File(label="Download ZIP", visible=False)
+    gr.Markdown("### Image Gallery (Click Thumbnails to View Full Size)")
+    image_paths_state = gr.State()
+    IMAGES_PER_ROW = 4
+    MAX_ROWS = 6
+    TOTAL_IMAGES = IMAGES_PER_ROW * MAX_ROWS
+    image_outputs = []
+    checkbox_outputs = []
+    for row in range(MAX_ROWS):
+        with gr.Row():
+            for col in range(IMAGES_PER_ROW):
+                idx = row * IMAGES_PER_ROW + col
+                with gr.Column(min_width=150):
+                    image_output = gr.Image(
+                        label=f"Image {idx+1}",
+                        visible=False,
+                        height=150,
+                        width=150
+                    )
+                    checkbox_output = gr.Checkbox(
+                        label=f"Include in ZIP",
+                        value=True,
+                        visible=False
+                    )
+                    image_outputs.append(image_output)
+                    checkbox_outputs.append(checkbox_output)
+    gr.Markdown("### Submit Selections")
+    submit_button = gr.Button("Create ZIP of Selected Images")
+    def on_download(api_name, category, num_images):
+        status, zip_path, image_paths, image_outs, checkbox_outs = process_and_display(api_name, category, num_images)
+        return (
+            status,
+            zip_path,
+            image_paths,
+            *[gr.Image(value=img, visible=img is not None, label=f"Image {i+1}", height=150, width=150) if img else gr.Image(value=None, visible=False) for i, img in enumerate(image_outs)],
+            *[gr.Checkbox(value=chk, visible=chk, label=f"Include in ZIP") if chk else gr.Checkbox(value=False, visible=False) for chk in checkbox_outs]
+        )
+    def on_submit(image_paths, *checkbox_states):
+        status, zip_path = process_zip_submission(image_paths, *checkbox_states)
+        return status, gr.File(value=zip_path, visible=True) if zip_path else gr.File(visible=False)
+    download_button.click(
+        fn=on_download,
+        inputs=[api_input, category_input, num_images_input],
+        outputs=[status_output, zip_output, image_paths_state] + image_outputs + checkbox_outputs
+    )
+    submit_button.click(
+        fn=on_submit,
+        inputs=[image_paths_state] + checkbox_outputs,
+        outputs=[status_output, zip_output]
+    )
+demo.launch()