Spaces:

Jing997
/

DeliveryRouteOptimisation

Sleeping

App Files Files Community

Jing997 commited on 29 days ago

Commit

300be5a

1 Parent(s): dc171c8

add utils src

Browse files

Files changed (5) hide show

src/utils/__init__.py +1 -0
src/utils/generate_all_data.py +89 -0
src/utils/generate_delivery_data.py +241 -0
src/utils/generate_travel_matrix.py +327 -0
src/utils/generate_vehicle_data.py +168 -0

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file is intentionally left blank.

src/utils/generate_all_data.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import sys
+from pathlib import Path
+# Add the project root directory to the Python path
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
+def create_data_directory():
+    """
+    Ensure data directories exist for all generated files.
+    This function creates the necessary directory structure to store
+    delivery data, vehicle data, and travel time matrices.
+    Returns:
+    --------
+    tuple of (str, str, str)
+        Paths to time matrix directory, vehicle data directory, and delivery data directory
+    """
+    vehicle_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'vehicle-data')
+    os.makedirs(vehicle_data_dir, exist_ok=True)
+    delivery_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'delivery-data')
+    os.makedirs(delivery_data_dir, exist_ok=True)
+    time_matrix_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'time-matrix')
+    os.makedirs(time_matrix_data_dir, exist_ok=True)
+    return time_matrix_data_dir, vehicle_data_dir, delivery_data_dir
+def main():
+    """
+    Run all data generation scripts for the delivery route optimization project.
+    This function orchestrates the creation of all synthetic datasets needed for
+    the route optimization problem, including delivery data, vehicle data, and
+    travel time/distance matrices.
+    Generated Files:
+    --------------
+    1. Delivery Data:
+       - Contains information about delivery locations, time windows, packages, etc.
+       - Used to define the delivery stops in the routing problem.
+    2. Vehicle Data:
+       - Contains information about the delivery fleet, capacity, depots, etc.
+       - Used to define the available resources for delivery routes.
+    3. Travel Matrices:
+       - Contains distance and time information between all locations.
+       - Used by the optimization algorithm to calculate route costs.
+    Usage:
+    ------
+    These generated datasets form the foundation of the delivery route optimization
+    application. Together they define:
+    - Where deliveries need to be made (delivery data)
+    - What resources are available for deliveries (vehicle data)
+    - How long it takes to travel between locations (travel matrices)
+    The route optimization algorithm uses these inputs to determine the most
+    efficient assignment of deliveries to vehicles and the optimal sequence of
+    stops for each vehicle.
+    """
+    print("Starting data generation process...")
+    time_matrix_data_dir, vehicle_data_dir, delivery_data_dir = create_data_directory()
+    print(f"Time Matrix Data will be saved to: {time_matrix_data_dir}")
+    print(f"Delivery Data will be saved to: {delivery_data_dir}")
+    print(f"Vehicle Data will be saved to: {vehicle_data_dir}")
+    # Import and run delivery data generation
+    print("\n1. Generating delivery data...")
+    from src.utils.generate_delivery_data import generate_delivery_data
+    delivery_data = generate_delivery_data(50, use_geocoding=True)
+    # Import and run vehicle data generation
+    print("\n2. Generating vehicle data...")
+    from src.utils.generate_vehicle_data import generate_vehicle_data
+    vehicle_data = generate_vehicle_data(10)
+    # Import and run travel matrix generation
+    print("\n3. Generating travel matrices...")
+    from src.utils.generate_travel_matrix import generate_travel_matrix
+    generate_travel_matrix()
+    print("\nAll data generation complete! Files saved to data directory.")
+if __name__ == "__main__":
+    main()

src/utils/generate_delivery_data.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import pandas as pd
+import numpy as np
+import random
+from datetime import datetime, timedelta
+import os
+import time
+import requests
+from geopy.geocoders import Nominatim
+# Set random seed for reproducibility
+np.random.seed(42)
+def generate_delivery_data(n_deliveries=50, use_geocoding=False):
+    """
+    Generate synthetic delivery data with realistic Singapore addresses
+    """
+    # Define real Singapore neighborhoods and their actual streets
+    # Format: [neighborhood_name, [list_of_real_streets], postal_code_prefix]
+    sg_neighborhoods = [
+        ['Ang Mo Kio', ['Ang Mo Kio Avenue 1', 'Ang Mo Kio Avenue 3', 'Ang Mo Kio Avenue 4', 'Ang Mo Kio Avenue 10'], '56'],
+        ['Bedok', ['Bedok North Avenue 1', 'Bedok North Road', 'Bedok Reservoir Road', 'New Upper Changi Road'], '46'],
+        ['Bishan', ['Bishan Street 11', 'Bishan Street 12', 'Bishan Street 13', 'Bishan Street 22'], '57'],
+        ['Bukit Merah', ['Jalan Bukit Merah', 'Henderson Road', 'Tiong Bahru Road', 'Redhill Close'], '15'],
+        ['Bukit Batok', ['Bukit Batok East Avenue 6', 'Bukit Batok West Avenue 8', 'Bukit Batok Street 21'], '65'],
+        ['Clementi', ['Clementi Avenue 1', 'Clementi Avenue 4', 'Clementi Road', 'Commonwealth Avenue West'], '12'],
+        ['Geylang', ['Geylang East Avenue 1', 'Geylang Road', 'Guillemard Road', 'Sims Avenue'], '38'],
+        ['Hougang', ['Hougang Avenue 1', 'Hougang Avenue 7', 'Hougang Street 91', 'Upper Serangoon Road'], '53'],
+        ['Jurong East', ['Jurong East Street 13', 'Jurong East Avenue 1', 'Jurong Gateway Road'], '60'],
+        ['Jurong West', ['Jurong West Street 41', 'Jurong West Street 52', 'Jurong West Street 93'], '64'],
+        ['Kallang', ['Kallang Avenue', 'Geylang Bahru', 'Boon Keng Road', 'Upper Boon Keng Road'], '33'],
+        ['Punggol', ['Punggol Central', 'Punggol Field', 'Punggol Road', 'Punggol Way'], '82'],
+        ['Queenstown', ['Commonwealth Avenue', 'Commonwealth Drive', 'Mei Chin Road', 'Stirling Road'], '14'],
+        ['Sengkang', ['Sengkang East Way', 'Sengkang West Way', 'Compassvale Road', 'Fernvale Road'], '54'],
+        ['Serangoon', ['Serangoon Avenue 2', 'Serangoon Avenue 3', 'Serangoon North Avenue 1'], '55'],
+        ['Tampines', ['Tampines Street 11', 'Tampines Street 21', 'Tampines Avenue 1', 'Tampines Avenue 4'], '52'],
+        ['Toa Payoh', ['Toa Payoh Lorong 1', 'Toa Payoh Lorong 2', 'Toa Payoh Lorong 4', 'Toa Payoh Central'], '31'],
+        ['Woodlands', ['Woodlands Avenue 1', 'Woodlands Drive 16', 'Woodlands Drive 72', 'Woodlands Circle'], '73'],
+        ['Yishun', ['Yishun Avenue 1', 'Yishun Avenue 4', 'Yishun Ring Road', 'Yishun Street 22'], '76']
+    ]
+    # Bounding boxes for neighborhoods (for fallback coordinates)
+    # Format: [name, min_lat, max_lat, min_lon, max_lon]
+    neighborhood_bounds = {
+        'Ang Mo Kio': [1.360000, 1.380000, 103.830000, 103.860000],
+        'Bedok': [1.320000, 1.335000, 103.920000, 103.950000],
+        'Bishan': [1.345000, 1.360000, 103.830000, 103.855000],
+        'Bukit Merah': [1.270000, 1.290000, 103.800000, 103.830000],
+        'Bukit Batok': [1.340000, 1.360000, 103.740000, 103.770000],
+        'Clementi': [1.310000, 1.325000, 103.750000, 103.780000],
+        'Geylang': [1.310000, 1.325000, 103.880000, 103.900000],
+        'Hougang': [1.370000, 1.385000, 103.880000, 103.900000],
+        'Jurong East': [1.330000, 1.345000, 103.730000, 103.750000],
+        'Jurong West': [1.340000, 1.360000, 103.690000, 103.720000],
+        'Kallang': [1.300000, 1.320000, 103.850000, 103.880000],
+        'Punggol': [1.390000, 1.410000, 103.900000, 103.920000],
+        'Queenstown': [1.290000, 1.310000, 103.780000, 103.805000],
+        'Sengkang': [1.380000, 1.395000, 103.870000, 103.900000],
+        'Serangoon': [1.345000, 1.360000, 103.865000, 103.885000],
+        'Tampines': [1.345000, 1.365000, 103.930000, 103.960000],
+        'Toa Payoh': [1.326000, 1.341000, 103.840000, 103.865000],
+        'Woodlands': [1.430000, 1.450000, 103.770000, 103.800000],
+        'Yishun': [1.410000, 1.430000, 103.820000, 103.850000]
+    }
+    # Generate delivery IDs
+    delivery_ids = [f'DEL{str(i).zfill(4)}' for i in range(1, n_deliveries + 1)]
+    # Generate customer names (fictional)
+    first_names = ['Tan', 'Lim', 'Lee', 'Ng', 'Wong', 'Chan', 'Goh', 'Ong', 'Teo', 'Koh',
+                   'Chua', 'Loh', 'Yeo', 'Sim', 'Ho', 'Ang', 'Tay', 'Yap', 'Leong', 'Foo']
+    last_names = ['Wei', 'Ming', 'Hui', 'Ling', 'Yong', 'Jun', 'Hong', 'Xin', 'Yi', 'Jie',
+                  'Cheng', 'Kai', 'Zhi', 'Tian', 'Yu', 'En', 'Yang', 'Hao', 'Chong', 'Zheng']
+    customer_names = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(n_deliveries)]
+    addresses = []
+    postal_codes = []
+    latitudes = []
+    longitudes = []
+    neighborhood_names = []
+    # Initialize geocoder if using geocoding
+    if use_geocoding:
+        geolocator = Nominatim(user_agent="delivery_app")
+    # Generate realistic addresses
+    for i in range(n_deliveries):
+        # Randomly select a neighborhood
+        neighborhood_data = random.choice(sg_neighborhoods)
+        neighborhood = neighborhood_data[0]
+        streets = neighborhood_data[1]
+        postal_prefix = neighborhood_data[2]
+        # Randomly select a street in that neighborhood
+        street = random.choice(streets)
+        # Generate block number (realistic for HDB)
+        block = random.randint(100, 600)
+        # Generate unit number
+        unit_floor = random.randint(2, 20)
+        unit_number = random.randint(1, 150)
+        # Generate postal code (with realistic prefix)
+        postal_suffix = str(random.randint(0, 999)).zfill(3)
+        postal_code = postal_prefix + postal_suffix
+        # Create two formats of address - one for display, one for geocoding
+        display_address = f"Block {block}, #{unit_floor:02d}-{unit_number:03d}, {street}, Singapore {postal_code}"
+        geocode_address = f"{block} {street}, Singapore {postal_code}"  # Simpler format for geocoding
+        # Default coordinates from neighborhood bounding box (fallback)
+        bounds = neighborhood_bounds[neighborhood]
+        default_lat = round(random.uniform(bounds[0], bounds[1]), 6)
+        default_lon = round(random.uniform(bounds[2], bounds[3]), 6)
+        # Use geocoding API if requested
+        if use_geocoding:
+            try:
+                location = geolocator.geocode(geocode_address)
+                if location:
+                    lat = location.latitude
+                    lon = location.longitude
+                    print(f"✓ Successfully geocoded: {geocode_address} → ({lat}, {lon})")
+                else:
+                    # First fallback: try with just street and postal code
+                    simpler_address = f"{street}, Singapore {postal_code}"
+                    location = geolocator.geocode(simpler_address)
+                    if location:
+                        lat = location.latitude
+                        lon = location.longitude
+                        print(f"✓ Fallback geocoded: {simpler_address} → ({lat}, {lon})")
+                    else:
+                        # Second fallback: just use the neighborhood center
+                        lat = default_lat
+                        lon = default_lon
+                        print(f"✗ Could not geocode: {geocode_address}, using neighborhood coordinates")
+                # Add delay to avoid being rate limited
+                time.sleep(1)
+            except Exception as e:
+                print(f"✗ Geocoding error for {geocode_address}: {str(e)}")
+                lat = default_lat
+                lon = default_lon
+        else:
+            # Without geocoding, use the default coordinates
+            lat = default_lat
+            lon = default_lon
+        addresses.append(display_address)
+        postal_codes.append(postal_code)
+        latitudes.append(lat)
+        longitudes.append(lon)
+        neighborhood_names.append(neighborhood)
+    # Generate delivery dates (within the next 7 days)
+    base_date = datetime.now().date()
+    delivery_dates = [base_date + timedelta(days=random.randint(1, 7)) for _ in range(n_deliveries)]
+    # Generate time windows (between 9 AM and 5 PM)
+    time_windows = []
+    for _ in range(n_deliveries):
+        start_hour = random.randint(9, 16)
+        window_length = random.choice([1, 2, 3])  # 1, 2, or 3 hour windows
+        end_hour = min(start_hour + window_length, 18)
+        start_time = f"{start_hour:02d}:00"
+        end_time = f"{end_hour:02d}:00"
+        time_windows.append(f"{start_time}-{end_time}")
+    # Generate package details
+    weights = np.random.uniform(0.5, 20.0, n_deliveries)  # in kg
+    volumes = np.random.uniform(0.01, 0.5, n_deliveries)  # in m³
+    # Priority levels
+    priorities = np.random.choice(['High', 'Medium', 'Low'], n_deliveries,
+                                 p=[0.2, 0.5, 0.3])  # 20% High, 50% Medium, 30% Low
+    # Required vehicle type
+    vehicle_types = np.random.choice(['Standard', 'Large', 'Refrigerated'], n_deliveries,
+                                   p=[0.7, 0.2, 0.1])
+    # Status
+    statuses = np.random.choice(['Pending', 'Assigned', 'In Transit', 'Delivered'], n_deliveries,
+                              p=[0.6, 0.2, 0.15, 0.05])
+    # Additional notes
+    notes = []
+    special_instructions = [
+        'Call customer before delivery',
+        'Fragile items',
+        'Leave at door',
+        'Signature required',
+        'No delivery on weekends',
+        None
+    ]
+    for _ in range(n_deliveries):
+        if random.random() < 0.7:  # 70% chance of having a note
+            notes.append(random.choice(special_instructions))
+        else:
+            notes.append(None)
+    # Create DataFrame
+    df = pd.DataFrame({
+        'delivery_id': delivery_ids,
+        'customer_name': customer_names,
+        'address': addresses,
+        'postal_code': postal_codes,
+        'neighborhood': neighborhood_names,
+        'latitude': latitudes,
+        'longitude': longitudes,
+        'delivery_date': delivery_dates,
+        'time_window': time_windows,
+        'weight_kg': weights.round(2),
+        'volume_m3': volumes.round(3),
+        'priority': priorities,
+        'vehicle_type': vehicle_types,
+        'status': statuses,
+        'special_instructions': notes
+    })
+    # Ensure the directory exists
+    data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'delivery-data')
+    os.makedirs(data_dir, exist_ok=True)
+    # Save to CSV
+    output_path = os.path.join(data_dir, 'delivery_data.csv')
+    df.to_csv(output_path, index=False)
+    print(f"Delivery data generated and saved to {output_path}")
+    return df
+if __name__ == "__main__":
+    # Set to True if you want to use real geocoding (slower but more accurate)
+    USE_GEOCODING = True
+    delivery_data = generate_delivery_data(50, use_geocoding=USE_GEOCODING)
+    print("Sample of delivery data:")
+    print(delivery_data.head())

src/utils/generate_travel_matrix.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import pandas as pd
+import numpy as np
+import os
+import time
+import requests
+from math import radians, sin, cos, sqrt, atan2
+import random
+def haversine_distance(lat1, lon1, lat2, lon2):
+    """
+    Calculate the Haversine distance between two points in kilometers.
+    The Haversine distance is the great-circle distance between two points on a sphere.
+    Parameters:
+    -----------
+    lat1, lon1 : float
+        Coordinates of the first point in decimal degrees
+    lat2, lon2 : float
+        Coordinates of the second point in decimal degrees
+    Returns:
+    --------
+    float
+        Distance between the two points in kilometers
+    """
+    # Convert decimal degrees to radians
+    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
+    # Haversine formula
+    dlon = lon2 - lon1
+    dlat = lat2 - lat1
+    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
+    c = 2 * atan2(sqrt(a), sqrt(1-a))
+    distance = 6371 * c  # Radius of Earth in kilometers
+    return distance
+def get_road_distance_with_retry(origin, destination, max_retries=3, initial_backoff=1):
+    """
+    Get road distance between two points with retry logic
+    Parameters:
+    -----------
+    origin : dict
+        Origin location with 'latitude' and 'longitude' keys
+    destination : dict
+        Destination location with 'latitude' and 'longitude' keys
+    max_retries : int
+        Maximum number of retry attempts
+    initial_backoff : int
+        Initial backoff time in seconds
+    Returns:
+    --------
+    tuple of (float, float)
+        Distance in km and duration in minutes
+    """
+    # URLs for different public OSRM instances to distribute load
+    osrm_urls = [
+        "http://router.project-osrm.org",
+        "https://routing.openstreetmap.de",
+        # Add more public OSRM servers if available
+    ]
+    retry_count = 0
+    backoff = initial_backoff
+    while retry_count < max_retries:
+        try:
+            # Use a random OSRM server from the list to distribute load
+            base_url = random.choice(osrm_urls)
+            url = f"{base_url}/route/v1/driving/{origin['longitude']},{origin['latitude']};{destination['longitude']},{destination['latitude']}?overview=false"
+            # Add a timeout to prevent hanging connections
+            response = requests.get(url, timeout=5)
+            data = response.json()
+            if data.get('code') == 'Ok':
+                # Extract distance and duration
+                distance = data['routes'][0]['distance'] / 1000  # meters to km
+                duration = data['routes'][0]['duration'] / 60    # seconds to minutes
+                return round(distance, 2), round(duration, 2)
+            else:
+                print(f"API returned error: {data.get('message', 'Unknown error')}")
+        except requests.exceptions.RequestException as e:
+            print(f"Request failed: {e}. Retry {retry_count+1}/{max_retries}")
+        # Exponential backoff with jitter to prevent thundering herd
+        jitter = random.uniform(0, 0.5 * backoff)
+        sleep_time = backoff + jitter
+        time.sleep(sleep_time)
+        backoff *= 2  # Exponential backoff
+        retry_count += 1
+    # Fallback to haversine after all retries failed
+    print(f"All retries failed for route from ({origin['latitude']},{origin['longitude']}) to ({destination['latitude']},{destination['longitude']}). Using haversine distance.")
+    distance = haversine_distance(
+        origin['latitude'], origin['longitude'],
+        destination['latitude'], destination['longitude']
+    )
+    distance = distance * 1.3  # Road factor
+    time_mins = (distance / 40) * 60  # 40 km/h
+    return round(distance, 2), round(time_mins, 2)
+def get_road_distance(origins, destinations, use_osrm=True):
+    """
+    Calculate actual road distances and travel times between multiple origins and destinations
+    using the OSRM (Open Source Routing Machine) API.
+    Parameters:
+    -----------
+    origins : list of dict
+        List of origin locations with 'latitude' and 'longitude' keys
+    destinations : list of dict
+        List of destination locations with 'latitude' and 'longitude' keys
+    use_osrm : bool, default=True
+        Whether to use OSRM API or fall back to haversine distance
+    Returns:
+    --------
+    tuple of (numpy.ndarray, numpy.ndarray)
+        Arrays containing distances (in km) and durations (in minutes) between each origin-destination pair
+    """
+    n_origins = len(origins)
+    n_destinations = len(destinations)
+    distance_matrix = np.zeros((n_origins, n_destinations))
+    duration_matrix = np.zeros((n_origins, n_destinations))
+    # If OSRM is not requested, fall back to haversine distance
+    if not use_osrm:
+        print("Using haversine distance as fallback.")
+        for i, origin in enumerate(origins):
+            for j, dest in enumerate(destinations):
+                distance = haversine_distance(
+                    origin['latitude'], origin['longitude'],
+                    dest['latitude'], dest['longitude']
+                )
+                # Adjust for road networks (roads are typically not straight lines)
+                distance = distance * 1.3  # Apply a factor to approximate road distance
+                time_mins = (distance / 40) * 60  # Assuming average speed of 40 km/h
+                distance_matrix[i, j] = round(distance, 2)
+                duration_matrix[i, j] = round(time_mins, 2)
+        return distance_matrix, duration_matrix
+    # Process in batches to prevent overwhelming the API
+    print(f"Processing {n_origins} origins and {n_destinations} destinations in batches...")
+    total_requests = n_origins * n_destinations
+    completed = 0
+    try:
+        # Try OSRM's table service for small datasets first (more efficient)
+        if n_origins + n_destinations <= 50:
+            print("Trying OSRM table API for efficient matrix calculation...")
+            try:
+                # Code for table API would go here, but we'll skip for now as it's more complex
+                # and the batch approach is more reliable for handling errors
+                raise NotImplementedError("Table API not implemented, falling back to individual routes")
+            except Exception as e:
+                print(f"Table API failed: {e}. Using individual routes instead.")
+                # Continue with individual route requests below
+        # Process with individual route requests
+        for i, origin in enumerate(origins):
+            for j, dest in enumerate(destinations):
+                # Skip if origin and destination are the same point
+                if i == j:
+                    distance_matrix[i, j] = 0
+                    duration_matrix[i, j] = 0
+                    completed += 1
+                    continue
+                # Get distance with retry logic
+                distance, duration = get_road_distance_with_retry(origin, dest)
+                distance_matrix[i, j] = distance
+                duration_matrix[i, j] = duration
+                # Show progress
+                completed += 1
+                if completed % 10 == 0:
+                    print(f"Progress: {completed}/{total_requests} routes calculated ({(completed/total_requests)*100:.1f}%)")
+                # Add randomized delay to prevent overwhelming the API
+                time.sleep(random.uniform(0.1, 0.5))
+    except KeyboardInterrupt:
+        print("\nOperation interrupted by user. Saving partial results...")
+    return distance_matrix, duration_matrix
+def generate_travel_matrix(use_osrm=True):
+    """
+    Generate travel time and distance matrices between all locations in the delivery problem.
+    Parameters:
+    -----------
+    use_osrm : bool, default=True
+        Whether to use OSRM API for real road distances instead of haversine
+    Returns:
+    --------
+    tuple of (pd.DataFrame, pd.DataFrame, dict)
+        Distance matrix, base time matrix, and hourly time matrices
+    """
+    # Create data directories if they don't exist
+    data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data')
+    time_matrix_dir = os.path.join(data_dir, 'time-matrix')
+    delivery_data_dir = os.path.join(data_dir, 'delivery-data')
+    vehicle_data_dir = os.path.join(data_dir, 'vehicle-data')
+    # Ensure all directories exist
+    for directory in [time_matrix_dir, delivery_data_dir, vehicle_data_dir]:
+        os.makedirs(directory, exist_ok=True)
+    # Read delivery and vehicle data
+    try:
+        delivery_data = pd.read_csv(os.path.join(delivery_data_dir, 'delivery_data.csv'))
+        vehicle_data = pd.read_csv(os.path.join(vehicle_data_dir, 'vehicle_data.csv'))
+    except FileNotFoundError:
+        print("Error: Please generate delivery and vehicle data first!")
+        return
+    # Extract locations
+    delivery_locations = delivery_data[['delivery_id', 'latitude', 'longitude']].values
+    depot_locations = vehicle_data[['vehicle_id', 'depot_latitude', 'depot_longitude']].values
+    # Average speed for time calculation (km/h)
+    avg_speed = vehicle_data['avg_speed_kmh'].mean()
+    # Traffic factor matrix (to simulate traffic conditions at different times)
+    hours_in_day = 24
+    traffic_factors = np.ones((hours_in_day, 1))
+    # Simulate morning rush hour (8-10 AM)
+    traffic_factors[8:10] = 1.5
+    # Simulate evening rush hour (5-7 PM)
+    traffic_factors[17:19] = 1.8
+    # Late night (less traffic)
+    traffic_factors[22:] = 0.8
+    traffic_factors[:5] = 0.7
+    # Create a combined list of all locations (depots + delivery points)
+    all_locations = []
+    # Add depot locations
+    for row in depot_locations:
+        all_locations.append({
+            'id': row[0],  # vehicle_id as location id
+            'type': 'depot',
+            'latitude': row[1],
+            'longitude': row[2]
+        })
+    # Add delivery locations
+    for row in delivery_locations:
+        all_locations.append({
+            'id': row[0],  # delivery_id as location id
+            'type': 'delivery',
+            'latitude': row[1],
+            'longitude': row[2]
+        })
+    print(f"Calculating distances between {len(all_locations)} locations...")
+    # Save the locations file early so we have this data even if the process is interrupted
+    location_df = pd.DataFrame(all_locations)
+    location_df.to_csv(os.path.join(time_matrix_dir, 'locations.csv'), index=False)
+    # Calculate distances and times using OSRM with improved error handling
+    if use_osrm:
+        print("Using OSRM API for road distances...")
+        distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=True)
+    else:
+        print("Using haversine distance with road factor adjustment...")
+        distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=False)
+    # Create DataFrames for the matrices
+    location_ids = [loc['id'] for loc in all_locations]
+    distance_df = pd.DataFrame(distance_matrix, index=location_ids, columns=location_ids)
+    time_df = pd.DataFrame(base_time_matrix, index=location_ids, columns=location_ids)
+    # Save distance and base time matrices early in case later steps fail
+    distance_df.to_csv(os.path.join(time_matrix_dir, 'distance_matrix.csv'))
+    time_df.to_csv(os.path.join(time_matrix_dir, 'base_time_matrix.csv'))
+    print("Basic distance and time matrices saved successfully.")
+    # Create time matrices for different hours of the day
+    hourly_time_matrices = {}
+    for hour in range(24):
+        traffic_factor = traffic_factors[hour][0]
+        hourly_time = base_time_matrix * traffic_factor
+        hourly_time_matrices[f"{hour:02d}:00"] = pd.DataFrame(hourly_time, index=location_ids, columns=location_ids)
+    # Save a sample of time matrices (e.g., rush hour and normal time)
+    try:
+        hourly_time_matrices['08:00'].to_csv(os.path.join(time_matrix_dir, 'morning_rush_time_matrix.csv'))
+        hourly_time_matrices['18:00'].to_csv(os.path.join(time_matrix_dir, 'evening_rush_time_matrix.csv'))
+        hourly_time_matrices['12:00'].to_csv(os.path.join(time_matrix_dir, 'midday_time_matrix.csv'))
+        hourly_time_matrices['00:00'].to_csv(os.path.join(time_matrix_dir, 'night_time_matrix.csv'))
+        print("Time matrices for different hours saved successfully.")
+    except Exception as e:
+        print(f"Error saving hourly time matrices: {e}")
+        print("Continuing with basic matrices only.")
+    print("Travel matrices generation complete.")
+    return distance_df, time_df, hourly_time_matrices
+if __name__ == "__main__":
+    # For development, allow falling back to haversine if needed
+    import argparse
+    parser = argparse.ArgumentParser(description="Generate travel matrices for delivery route optimization")
+    parser.add_argument("--use-osrm", action="store_true", help="Use OSRM API for real road distances")
+    parser.add_argument("--use-haversine", action="store_true", help="Use haversine distance only (faster)")
+    args = parser.parse_args()
+    if args.use_haversine:
+        generate_travel_matrix(use_osrm=False)
+    else:
+        # Default to OSRM unless explicitly disabled
+        generate_travel_matrix(use_osrm=True)

src/utils/generate_vehicle_data.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import pandas as pd
+import numpy as np
+import random
+from datetime import datetime, timedelta
+import os
+# Set random seed for reproducibility
+np.random.seed(43)
+def generate_vehicle_data(n_vehicles=10):
+    """
+    Generate synthetic vehicle data for a delivery fleet optimization problem.
+    This function creates a realistic delivery fleet with various vehicle types,
+    capacities, and operational parameters to be used in route optimization.
+    Parameters:
+    -----------
+    n_vehicles : int, default=10
+        Number of vehicles to generate in the fleet
+    Returns:
+    --------
+    pd.DataFrame
+        DataFrame containing the generated vehicle data
+    """
+    # Vehicle IDs
+    vehicle_ids = [f'VEH{str(i).zfill(3)}' for i in range(1, n_vehicles + 1)]
+    # Vehicle types
+    vehicle_types = []
+    for _ in range(n_vehicles):
+        vehicle_type = random.choices(['Standard', 'Large', 'Refrigerated'],
+                                     weights=[0.7, 0.2, 0.1])[0]
+        vehicle_types.append(vehicle_type)
+    # Vehicle capacities based on type
+    max_weights = []
+    max_volumes = []
+    for v_type in vehicle_types:
+        if v_type == 'Standard':
+            max_weights.append(random.uniform(800, 1200))
+            max_volumes.append(random.uniform(8, 12))
+        elif v_type == 'Large':
+            max_weights.append(random.uniform(1500, 2500))
+            max_volumes.append(random.uniform(15, 25))
+        else:  # Refrigerated
+            max_weights.append(random.uniform(600, 1000))
+            max_volumes.append(random.uniform(6, 10))
+    # Realistic depot/warehouse locations in Singapore industrial areas
+    # [name, latitude, longitude]
+    warehouse_locations = [
+        ["Tuas Logistics Hub", 1.3187, 103.6390],
+        ["Jurong Industrial Estate", 1.3233, 103.6994],
+        ["Loyang Industrial Park", 1.3602, 103.9761],
+        ["Changi Logistics Centre", 1.3497, 103.9742],
+        ["Keppel Distripark", 1.2706, 103.8219],
+        ["Pandan Logistics Hub", 1.3187, 103.7509],
+        ["Alexandra Distripark", 1.2744, 103.8012],
+        ["Kallang Way Industrial", 1.3315, 103.8731],
+        ["Defu Industrial Park", 1.3610, 103.8891],
+        ["Woodlands Industrial", 1.4428, 103.7875]
+    ]
+    # Assign warehouses to vehicles (multiple vehicles can be from same warehouse)
+    # Either assign sequentially to ensure all warehouses are used at least once (if n_vehicles >= len(warehouse_locations)),
+    # or randomly select from the list
+    depot_names = []
+    depot_lats = []
+    depot_lons = []
+    if n_vehicles <= len(warehouse_locations):
+        # Use first n_vehicles warehouses (one vehicle per warehouse)
+        selected_warehouses = warehouse_locations[:n_vehicles]
+    else:
+        # Ensure every warehouse is used at least once
+        selected_warehouses = warehouse_locations.copy()
+        # Then add random ones for remaining vehicles
+        remaining = n_vehicles - len(warehouse_locations)
+        selected_warehouses.extend([random.choice(warehouse_locations) for _ in range(remaining)])
+    # Shuffle to avoid sequential assignment
+    random.shuffle(selected_warehouses)
+    # Extract depot information
+    for warehouse in selected_warehouses:
+        depot_names.append(warehouse[0])
+        depot_lats.append(warehouse[1])
+        depot_lons.append(warehouse[2])
+    # Add small variation for vehicles from the same warehouse (within warehouse compound)
+    # This makes each vehicle's position slightly different, simulating different loading bays
+    for i in range(len(depot_lats)):
+        # Much smaller variation - within warehouse compound (approximately 50-100m variation)
+        depot_lats[i] += random.uniform(-0.0005, 0.0005)
+        depot_lons[i] += random.uniform(-0.0005, 0.0005)
+    # Driver names
+    first_names = ['Ahmad', 'Raj', 'Michael', 'Wei', 'Siti', 'Kumar', 'Chong', 'David', 'Suresh', 'Ali']
+    last_names = ['Tan', 'Singh', 'Lee', 'Wong', 'Kumar', 'Abdullah', 'Zhang', 'Lim', 'Raj', 'Teo']
+    driver_names = []
+    for i in range(n_vehicles):
+        if i < len(first_names):
+            driver_names.append(f"{first_names[i]} {random.choice(last_names)}")
+        else:
+            driver_names.append(f"{random.choice(first_names)} {random.choice(last_names)}")
+    # Vehicle availability
+    start_times = [f"{random.randint(7, 10):02d}:00" for _ in range(n_vehicles)]
+    end_times = [f"{random.randint(17, 21):02d}:00" for _ in range(n_vehicles)]
+    # Max working hours
+    max_working_hours = [random.randint(8, 10) for _ in range(n_vehicles)]
+    # Average speed (km/h)
+    avg_speeds = [random.uniform(30, 50) for _ in range(n_vehicles)]
+    # Cost per km
+    cost_per_km = [random.uniform(0.5, 1.5) for _ in range(n_vehicles)]
+    # Vehicle status
+    statuses = np.random.choice(['Available', 'In Service', 'Maintenance'], n_vehicles, p=[0.7, 0.2, 0.1])
+    # License plates (Singapore format)
+    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    license_plates = []
+    for _ in range(n_vehicles):
+        letter_part = ''.join(random.choices(letters, k=3))
+        number_part = random.randint(1000, 9999)
+        license_plates.append(f"S{letter_part}{number_part}")
+    # Create DataFrame
+    df = pd.DataFrame({
+        'vehicle_id': vehicle_ids,
+        'vehicle_type': vehicle_types,
+        'license_plate': license_plates,
+        'driver_name': driver_names,
+        'max_weight_kg': np.array(max_weights).round(2),
+        'max_volume_m3': np.array(max_volumes).round(2),
+        'depot_name': depot_names,
+        'depot_latitude': np.array(depot_lats).round(6),
+        'depot_longitude': np.array(depot_lons).round(6),
+        'start_time': start_times,
+        'end_time': end_times,
+        'max_working_hours': max_working_hours,
+        'avg_speed_kmh': np.array(avg_speeds).round(2),
+        'cost_per_km': np.array(cost_per_km).round(2),
+        'status': statuses
+    })
+    # Ensure the directory exists
+    data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'vehicle-data')
+    os.makedirs(data_dir, exist_ok=True)
+    # Save to CSV
+    output_path = os.path.join(data_dir, 'vehicle_data.csv')
+    df.to_csv(output_path, index=False)
+    print(f"Vehicle data generated and saved to {output_path}")
+    return df
+if __name__ == "__main__":
+    # Generate vehicle data
+    vehicle_data = generate_vehicle_data(10)
+    print("Sample of vehicle data:")
+    print(vehicle_data.head())