File size: 6,010 Bytes
4e4d03e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import os
import cv2
import numpy as np
import pickle
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops, hog

# ---------------------------------------------------------------------
# Feature Extraction Functions
# ---------------------------------------------------------------------
def get_average_color(image):
    """Compute the average color of the image in BGR space."""
    return np.mean(image, axis=(0, 1))

def get_color_histogram(image, bins=(8, 8, 8)):
    """
    Compute a normalized color histogram in HSV space.
    """
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

def get_lbp_histogram(image, numPoints=24, radius=8, bins=59):
    """
    Compute a histogram of Local Binary Patterns (LBP) from the grayscale image.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray, numPoints, radius, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=bins, range=(0, bins))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    return hist

def get_glcm_features(image,
                      distances=[1, 2, 4],
                      angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
                      properties=('contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM')):
    """
    Compute GLCM (Gray Level Co-occurrence Matrix) based features (a.k.a. Haralick features).
    distances: List of pixel distances.
    angles: List of angles in radians.
    properties: GLCM properties to compute for each distance and angle.
    Returns a concatenated feature vector of all properties.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    glcm = graycomatrix(gray,
                        distances=distances,
                        angles=angles,
                        levels=256,
                        symmetric=True,
                        normed=True)
    feats = []
    for prop in properties:
        vals = graycoprops(glcm, prop)
        feats.append(vals.ravel())  # flatten the NxM result for this property
    glcm_features = np.hstack(feats)
    return glcm_features

def get_hog_features(image,
                     orientations=9,
                     pixels_per_cell=(8, 8),
                     cells_per_block=(2, 2),
                     block_norm='L2-Hys'):
    """
    Compute Histogram of Oriented Gradients (HOG) from the grayscale image.
    By default, requires at least 16×16.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    fd = hog(gray,
             orientations=orientations,
             pixels_per_cell=pixels_per_cell,
             cells_per_block=cells_per_block,
             block_norm=block_norm)
    return fd

def get_combined_features(image):
    """
    Combine the average color, color histogram, LBP histogram,
    GLCM-based features, and HOG features into one feature vector.
    
    IMPORTANT: We force-resize the tile to 16×16 (for HOG) 
               if we want to match the mosaic script that 
               also forces 16×16 before HOG.
    """
    # -- Compute features from original image size --
    avg_color  = get_average_color(image)
    color_hist = get_color_histogram(image)
    lbp_hist   = get_lbp_histogram(image)
    glcm_feats = get_glcm_features(image)

    # -- Force-resize to 16×16 for HOG to match mosaic script --
    hog_input  = cv2.resize(image, (16, 16), interpolation=cv2.INTER_LINEAR)
    hog_feats  = get_hog_features(hog_input)

    # -- Concatenate everything --
    combined = np.concatenate([
        avg_color,
        color_hist,
        lbp_hist,
        glcm_feats,
        hog_feats
    ])
    return combined

# ---------------------------------------------------------------------
# Main Data Preparation Function
# ---------------------------------------------------------------------
def prepare_tile_data(tiles_folder, output_file):
    """
    Process all images in 'tiles_folder' to compute their feature vectors.
    Force-resize each tile to 16×16 for HOG (same as mosaic script).
    Save features + file paths to a pickle file.
    """
    tile_features = []
    tile_paths = []
    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')

    # Gather all valid image files
    all_files = [f for f in os.listdir(tiles_folder) if f.lower().endswith(valid_extensions)]
    total_files = len(all_files)

    if total_files == 0:
        print("No valid image files found in", tiles_folder)
        return

    print(f"Found {total_files} image(s) in '{tiles_folder}'. Starting feature extraction...")

    for idx, filename in enumerate(all_files, start=1):
        filepath = os.path.join(tiles_folder, filename)
        image = cv2.imread(filepath)
        if image is None:
            print(f"[{idx}/{total_files}] Warning: Failed to read {filepath}")
            continue

        # Extract combined features (with forced 16×16 for HOG)
        features = get_combined_features(image)

        tile_features.append(features)
        tile_paths.append(filepath)

        # Log progress
        print(f"[{idx}/{total_files}] Processed: {filename}")

    # Convert to NumPy array (float32 for KDTree)
    tile_features = np.array(tile_features, dtype=np.float32)

    # Save features and paths
    data = {'features': tile_features, 'paths': tile_paths}
    with open(output_file, 'wb') as f:
        pickle.dump(data, f)

    print(f"Saved features for {len(tile_paths)} tiles to {output_file}")

# ---------------------------------------------------------------------
# Script Entry Point
# ---------------------------------------------------------------------
if __name__ == "__main__":
    # Adjust as needed:
    tiles_folder = "images_dataset"    # Folder with tile images
    output_file  = "tile_features.pkl" # Pickle file for precomputed features

    prepare_tile_data(tiles_folder, output_file)