Spaces:

jbilcke-hf
/

text-to-map

Paused

File size: 15,066 Bytes

c8a43f4

import numpy as np
import cv2
import trimesh
import argparse
from PIL import Image
from sklearn.cluster import KMeans

class SatelliteModelGenerator:
    def __init__(self, building_height=0.05):
        self.building_height = building_height
        
        # Reference colors for segmentation (RGB)
        self.shadow_colors = np.array([
            [31, 42, 76],
            [58, 64, 92],
            [15, 27, 56],
            [21, 22, 50],
            [76, 81, 99]
        ])
        
        self.road_colors = np.array([
            [187, 182, 175],
            [138, 138, 138], 
            [142, 142, 129],
            [202, 199, 189]
        ])
        
        self.water_colors = np.array([
            [167, 225, 217],
            [67, 101, 97],
            [53, 83, 84],
            [47, 94, 100],
            [73, 131, 135]
        ])
        
        # Convert and normalize reference colors to HSV
        self.shadow_colors_hsv = cv2.cvtColor(self.shadow_colors.reshape(-1, 1, 3).astype(np.uint8), 
                                            cv2.COLOR_RGB2HSV).reshape(-1, 3).astype(float)
        self.road_colors_hsv = cv2.cvtColor(self.road_colors.reshape(-1, 1, 3).astype(np.uint8), 
                                          cv2.COLOR_RGB2HSV).reshape(-1, 3).astype(float)
        self.water_colors_hsv = cv2.cvtColor(self.water_colors.reshape(-1, 1, 3).astype(np.uint8), 
                                           cv2.COLOR_RGB2HSV).reshape(-1, 3).astype(float)
        
        # Normalize HSV values
        for colors_hsv in [self.shadow_colors_hsv, self.road_colors_hsv, self.water_colors_hsv]:
            colors_hsv[:, 0] = colors_hsv[:, 0] * 2
            colors_hsv[:, 1:] = colors_hsv[:, 1:] / 255
        
        # Color tolerances from original segmenter
        self.shadow_tolerance = {'hue': 15, 'sat': 0.15, 'val': 0.12}
        self.road_tolerance = {'hue': 10, 'sat': 0.12, 'val': 0.15}
        self.water_tolerance = {'hue': 20, 'sat': 0.15, 'val': 0.20}
        
        # Output colors (BGR for OpenCV)
        self.colors = {
            'black': np.array([0, 0, 0]),      # Shadows
            'blue': np.array([255, 0, 0]),     # Water
            'green': np.array([0, 255, 0]),    # Vegetation
            'gray': np.array([128, 128, 128]), # Roads
            'brown': np.array([0, 140, 255]),  # Terrain
            'white': np.array([255, 255, 255]) # Buildings
        }
        
        # Constants for height estimation
        self.shadow_search_distance = 5
        self.min_area_for_clustering = 1000
        self.residential_height_factor = 0.6
        self.isolation_threshold = 0.6

    def color_distance_hsv(self, pixel_hsv, reference_hsv, tolerance):
        """Calculate if a pixel is within tolerance of reference color in HSV space"""
        pixel_h = float(pixel_hsv[0]) * 2
        pixel_s = float(pixel_hsv[1]) / 255
        pixel_v = float(pixel_hsv[2]) / 255
        
        hue_diff = min(abs(pixel_h - reference_hsv[0]),
                      360 - abs(pixel_h - reference_hsv[0]))
        sat_diff = abs(pixel_s - reference_hsv[1])
        val_diff = abs(pixel_v - reference_hsv[2])
        
        return (hue_diff <= tolerance['hue'] and
                sat_diff <= tolerance['sat'] and
                val_diff <= tolerance['val'])

    def get_dominant_surrounding_color(self, output, y, x):
        """Determine dominant non-building color in neighborhood"""
        height, width = output.shape[:2]
        surroundings = []
        
        for dy in [-1, 0, 1]:
            for dx in [-1, 0, 1]:
                if dx == 0 and dy == 0:
                    continue
                
                ny, nx = y + dy, x + dx
                if 0 <= ny < height and 0 <= nx < width:
                    pixel_color = tuple(output[ny, nx].tolist())
                    if not np.array_equal(output[ny, nx], self.colors['white']):
                        surroundings.append(pixel_color)
        
        if not surroundings:
            return None
        
        surrounding_ratio = len(surroundings) / 8.0
        
        if surrounding_ratio >= self.isolation_threshold:
            color_counts = {}
            for color in surroundings:
                color_str = str(color)
                color_counts[color_str] = color_counts.get(color_str, 0) + 1
            
            most_common = max(color_counts.items(), key=lambda x: x[1])[0]
            return np.array(eval(most_common))
        
        return None

    def segment_image(self, img, window_size=5):
        """Segment image using improved color detection"""
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        output = np.zeros_like(img)
        
        pad = window_size // 2
        hsv_pad = np.pad(hsv, ((pad, pad), (pad, pad), (0, 0)), mode='edge')
        
        height, width = img.shape[:2]
        
        # First pass: initial segmentation
        for y in range(height):
            for x in range(width):
                window = hsv_pad[y:y+window_size, x:x+window_size]
                center_hsv = window[pad, pad]
                
                is_shadow = any(self.color_distance_hsv(center_hsv, ref_hsv, self.shadow_tolerance)
                              for ref_hsv in self.shadow_colors_hsv)
                
                is_road = any(self.color_distance_hsv(center_hsv, ref_hsv, self.road_tolerance)
                            for ref_hsv in self.road_colors_hsv)
                
                is_water = any(self.color_distance_hsv(center_hsv, ref_hsv, self.water_tolerance)
                             for ref_hsv in self.water_colors_hsv)
                
                if is_shadow:
                    output[y, x] = self.colors['black']
                elif is_water:
                    output[y, x] = self.colors['blue']
                elif is_road:
                    output[y, x] = self.colors['gray']
                else:
                    h, s, v = center_hsv
                    h = float(h) * 2  # Convert to 0-360 range
                    s = float(s) / 255
                    v = float(v) / 255
                    
                    # Check for pinkish building tones (around red hue with specific saturation)
                    is_pinkish = (
                        ((h >= 340 or h <= 15) and  # Red-pink hue range
                         0.2 <= s <= 0.6 and        # Moderate saturation
                         0.3 <= v <= 0.7)           # Moderate brightness
                    )
                    
                    # Vegetation detection (green)
                    is_vegetation = (
                        40 <= h <= 150 and 
                        s >= 0.15
                    )
                    
                    # Soil/dirt detection (yellow-brown, avoiding pinkish tones)
                    is_soil = (
                        15 <= h <= 45 and           # Yellow-brown hue range
                        0.15 <= s <= 0.45 and       # Lower saturation for dirt
                        not is_pinkish              # Exclude pinkish tones
                    )
                    
                    if is_pinkish:
                        output[y, x] = self.colors['white']  # Buildings
                    elif is_vegetation:
                        output[y, x] = self.colors['green']  # Vegetation
                    elif is_soil:
                        output[y, x] = self.colors['brown']  # Soil/dirt
                    else:
                        # Default to building for light-colored surfaces
                        output[y, x] = self.colors['white']
        
        # Second pass: handle isolated building pixels
        final_output = output.copy()
        for y in range(height):
            for x in range(width):
                if np.array_equal(output[y, x], self.colors['white']):
                    dominant_color = self.get_dominant_surrounding_color(output, y, x)
                    if dominant_color is not None:
                        final_output[y, x] = dominant_color
        
        return final_output

    def estimate_heights(self, img, segmented):
        """Estimate building heights"""
        buildings_mask = np.all(segmented == self.colors['white'], axis=2)
        shadows_mask = np.all(segmented == self.colors['black'], axis=2)
        
        num_buildings, labels = cv2.connectedComponents(buildings_mask.astype(np.uint8))
        
        areas = np.bincount(labels.flatten())[1:]  # Skip background
        max_area = np.max(areas) if len(areas) > 0 else 1
        
        height_map = np.zeros_like(labels, dtype=np.float32)
        
        for label in range(1, num_buildings):
            building_mask = (labels == label)
            if not np.any(building_mask):
                continue
            
            area = areas[label-1]
            size_factor = 0.3 + 0.7 * (area / max_area)
            
            dilated = cv2.dilate(building_mask.astype(np.uint8), np.ones((5,5), np.uint8))
            shadow_ratio = np.sum(dilated & shadows_mask) / np.sum(dilated)
            shadow_factor = 0.2 + 0.8 * shadow_ratio
            
            if area >= self.min_area_for_clustering:
                building_intensities = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[building_mask]
                kmeans = KMeans(n_clusters=2, random_state=42)
                clusters = kmeans.fit_predict(building_intensities.reshape(-1, 1))
                cluster_means = [building_intensities[clusters == i].mean() for i in range(2)]
                height_factor = self.residential_height_factor if cluster_means[0] > cluster_means[1] else 1.0
            else:
                height_factor = 1.0
            
            final_height = size_factor * shadow_factor * height_factor
            height_map[building_mask] = final_height
        
        return height_map * 0.15

    def generate_mesh(self, height_map, texture_img, add_walls=True):
        """Generate 3D mesh"""
        height, width = height_map.shape
        
        x, z = np.meshgrid(np.arange(width), np.arange(height))
        vertices = np.stack([x, height_map * self.building_height, z], axis=-1)
        vertices = vertices.reshape(-1, 3)
        
        scale = max(width, height)
        vertices[:, 0] = vertices[:, 0] / scale * 2 - (width / scale)
        vertices[:, 2] = vertices[:, 2] / scale * 2 - (height / scale)
        vertices[:, 1] = vertices[:, 1] * 2 - 1
        
        i, j = np.meshgrid(np.arange(height-1), np.arange(width-1), indexing='ij')
        v0 = (i * width + j).flatten()
        v1 = v0 + 1
        v2 = ((i + 1) * width + j).flatten()
        v3 = v2 + 1
        
        faces = np.vstack((
            np.column_stack((v0, v2, v1)),
            np.column_stack((v1, v2, v3))
        ))
        
        uvs = np.zeros((vertices.shape[0], 2))
        uvs[:, 0] = x.flatten() / (width - 1)
        uvs[:, 1] = 1 - (z.flatten() / (height - 1))
        
        if len(texture_img.shape) == 3:
            if texture_img.shape[2] == 4:
                texture_img = cv2.cvtColor(texture_img, cv2.COLOR_BGRA2RGB)
            else:
                texture_img = cv2.cvtColor(texture_img, cv2.COLOR_BGR2RGB)
        
        mesh = trimesh.Trimesh(
            vertices=vertices,
            faces=faces,
            visual=trimesh.visual.TextureVisuals(
                uv=uvs,
                image=Image.fromarray(texture_img)
            )
        )
        
        if add_walls:
            mesh = self._add_walls(mesh, height_map)
        
        return mesh

    def _add_walls(self, mesh, height_map):
        """Add vertical walls at building edges"""
        edges = cv2.Canny(height_map.astype(np.uint8) * 255, 100, 200)
        height, width = height_map.shape
        scale = max(width, height)
        
        edge_coords = np.column_stack(np.where(edges > 0))
        if len(edge_coords) == 0:
            return mesh
        
        valid_mask = (edge_coords[:, 0] < height - 1) & (edge_coords[:, 1] < width - 1)
        edge_coords = edge_coords[valid_mask]
        
        if len(edge_coords) == 0:
            return mesh
        
        y, x = edge_coords.T
        heights = height_map[y, x]
        
        top_front = np.column_stack([x, heights * self.building_height, y])
        top_back = np.column_stack([x + 1, heights * self.building_height, y])
        bottom_front = np.column_stack([x, np.zeros_like(heights), y])
        bottom_back = np.column_stack([x + 1, np.zeros_like(heights), y])
        
        for vertices in [top_front, top_back, bottom_front, bottom_back]:
            vertices[:, 0] = vertices[:, 0] / scale * 2 - (width / scale)
            vertices[:, 2] = vertices[:, 2] / scale * 2 - (height / scale)
            vertices[:, 1] = vertices[:, 1] * 2 - 1
        
        new_vertices = np.vstack([top_front, top_back, bottom_front, bottom_back])
        vertex_count = len(edge_coords)
        
        indices = np.arange(4 * vertex_count).reshape(-1, 4)
        new_faces = np.vstack([
            np.column_stack([indices[:, 0], indices[:, 2], indices[:, 1]]),
            np.column_stack([indices[:, 1], indices[:, 2], indices[:, 3]])
        ])
        
        base_vertex_count = len(mesh.vertices)
        mesh.vertices = np.vstack((mesh.vertices, new_vertices))
        mesh.faces = np.vstack((mesh.faces, new_faces + base_vertex_count))
        
        return mesh

def main():
    parser = argparse.ArgumentParser(description='Generate 3D mesh from satellite image')
    parser.add_argument('input_image', help='Path to satellite image')
    parser.add_argument('output_mesh', help='Path for output GLB file')
    parser.add_argument('--segmented_output', help='Optional path to save segmented image')
    parser.add_argument('--height', type=float, default=0.09, help='Height of buildings (default: 0.09)')
    parser.add_argument('--no_walls', action='store_true', help='Skip generating vertical walls')
    parser.add_argument('--window_size', type=int, default=5, help='Window size for segmentation analysis')
    
    args = parser.parse_args()
    
    # Load image
    img = cv2.imread(args.input_image)
    if img is None:
        raise ValueError(f"Could not read image at {args.input_image}")
    
    generator = SatelliteModelGenerator(building_height=args.height)
    
    # Process image
    print("Segmenting image...")
    segmented_img = generator.segment_image(img, args.window_size)
    
    print("Estimating heights...")
    height_map = generator.estimate_heights(img, segmented_img)
    
    # Save segmented image if requested
    if args.segmented_output:
        cv2.imwrite(args.segmented_output, segmented_img)
        print(f"Segmented image saved to {args.segmented_output}")
    
    # Generate and save mesh
    print("Generating mesh...")
    mesh = generator.generate_mesh(height_map, img, add_walls=not args.no_walls)
    mesh.export(args.output_mesh)
    print(f"Mesh exported to {args.output_mesh}")

if __name__ == "__main__":
    main()