import os import cv2 import numpy as np import pickle from skimage.feature import local_binary_pattern, graycomatrix, graycoprops, hog # --------------------------------------------------------------------- # Feature Extraction Functions # --------------------------------------------------------------------- def get_average_color(image): """Compute the average color of the image in BGR space.""" return np.mean(image, axis=(0, 1)) def get_color_histogram(image, bins=(8, 8, 8)): """ Compute a normalized color histogram in HSV space. """ hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256]) cv2.normalize(hist, hist) return hist.flatten() def get_lbp_histogram(image, numPoints=24, radius=8, bins=59): """ Compute a histogram of Local Binary Patterns (LBP) from the grayscale image. """ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) lbp = local_binary_pattern(gray, numPoints, radius, method="uniform") hist, _ = np.histogram(lbp.ravel(), bins=bins, range=(0, bins)) hist = hist.astype("float") hist /= (hist.sum() + 1e-7) return hist def get_glcm_features(image, distances=[1, 2, 4], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], properties=('contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM')): """ Compute GLCM (Gray Level Co-occurrence Matrix) based features (a.k.a. Haralick features). distances: List of pixel distances. angles: List of angles in radians. properties: GLCM properties to compute for each distance and angle. Returns a concatenated feature vector of all properties. """ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) glcm = graycomatrix(gray, distances=distances, angles=angles, levels=256, symmetric=True, normed=True) feats = [] for prop in properties: vals = graycoprops(glcm, prop) feats.append(vals.ravel()) # flatten the NxM result for this property glcm_features = np.hstack(feats) return glcm_features def get_hog_features(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys'): """ Compute Histogram of Oriented Gradients (HOG) from the grayscale image. By default, requires at least 16×16. """ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) fd = hog(gray, orientations=orientations, pixels_per_cell=pixels_per_cell, cells_per_block=cells_per_block, block_norm=block_norm) return fd def get_combined_features(image): """ Combine the average color, color histogram, LBP histogram, GLCM-based features, and HOG features into one feature vector. IMPORTANT: We force-resize the tile to 16×16 (for HOG) if we want to match the mosaic script that also forces 16×16 before HOG. """ # -- Compute features from original image size -- avg_color = get_average_color(image) color_hist = get_color_histogram(image) lbp_hist = get_lbp_histogram(image) glcm_feats = get_glcm_features(image) # -- Force-resize to 16×16 for HOG to match mosaic script -- hog_input = cv2.resize(image, (16, 16), interpolation=cv2.INTER_LINEAR) hog_feats = get_hog_features(hog_input) # -- Concatenate everything -- combined = np.concatenate([ avg_color, color_hist, lbp_hist, glcm_feats, hog_feats ]) return combined # --------------------------------------------------------------------- # Main Data Preparation Function # --------------------------------------------------------------------- def prepare_tile_data(tiles_folder, output_file): """ Process all images in 'tiles_folder' to compute their feature vectors. Force-resize each tile to 16×16 for HOG (same as mosaic script). Save features + file paths to a pickle file. """ tile_features = [] tile_paths = [] valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff') # Gather all valid image files all_files = [f for f in os.listdir(tiles_folder) if f.lower().endswith(valid_extensions)] total_files = len(all_files) if total_files == 0: print("No valid image files found in", tiles_folder) return print(f"Found {total_files} image(s) in '{tiles_folder}'. Starting feature extraction...") for idx, filename in enumerate(all_files, start=1): filepath = os.path.join(tiles_folder, filename) image = cv2.imread(filepath) if image is None: print(f"[{idx}/{total_files}] Warning: Failed to read {filepath}") continue # Extract combined features (with forced 16×16 for HOG) features = get_combined_features(image) tile_features.append(features) tile_paths.append(filepath) # Log progress print(f"[{idx}/{total_files}] Processed: {filename}") # Convert to NumPy array (float32 for KDTree) tile_features = np.array(tile_features, dtype=np.float32) # Save features and paths data = {'features': tile_features, 'paths': tile_paths} with open(output_file, 'wb') as f: pickle.dump(data, f) print(f"Saved features for {len(tile_paths)} tiles to {output_file}") # --------------------------------------------------------------------- # Script Entry Point # --------------------------------------------------------------------- if __name__ == "__main__": # Adjust as needed: tiles_folder = "images_dataset" # Folder with tile images output_file = "tile_features.pkl" # Pickle file for precomputed features prepare_tile_data(tiles_folder, output_file)