File size: 13,529 Bytes
300be5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
import pandas as pd
import numpy as np
import os
import time
import requests
from math import radians, sin, cos, sqrt, atan2
import random
def haversine_distance(lat1, lon1, lat2, lon2):
"""
Calculate the Haversine distance between two points in kilometers.
The Haversine distance is the great-circle distance between two points on a sphere.
Parameters:
-----------
lat1, lon1 : float
Coordinates of the first point in decimal degrees
lat2, lon2 : float
Coordinates of the second point in decimal degrees
Returns:
--------
float
Distance between the two points in kilometers
"""
# Convert decimal degrees to radians
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
# Haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * atan2(sqrt(a), sqrt(1-a))
distance = 6371 * c # Radius of Earth in kilometers
return distance
def get_road_distance_with_retry(origin, destination, max_retries=3, initial_backoff=1):
"""
Get road distance between two points with retry logic
Parameters:
-----------
origin : dict
Origin location with 'latitude' and 'longitude' keys
destination : dict
Destination location with 'latitude' and 'longitude' keys
max_retries : int
Maximum number of retry attempts
initial_backoff : int
Initial backoff time in seconds
Returns:
--------
tuple of (float, float)
Distance in km and duration in minutes
"""
# URLs for different public OSRM instances to distribute load
osrm_urls = [
"http://router.project-osrm.org",
"https://routing.openstreetmap.de",
# Add more public OSRM servers if available
]
retry_count = 0
backoff = initial_backoff
while retry_count < max_retries:
try:
# Use a random OSRM server from the list to distribute load
base_url = random.choice(osrm_urls)
url = f"{base_url}/route/v1/driving/{origin['longitude']},{origin['latitude']};{destination['longitude']},{destination['latitude']}?overview=false"
# Add a timeout to prevent hanging connections
response = requests.get(url, timeout=5)
data = response.json()
if data.get('code') == 'Ok':
# Extract distance and duration
distance = data['routes'][0]['distance'] / 1000 # meters to km
duration = data['routes'][0]['duration'] / 60 # seconds to minutes
return round(distance, 2), round(duration, 2)
else:
print(f"API returned error: {data.get('message', 'Unknown error')}")
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}. Retry {retry_count+1}/{max_retries}")
# Exponential backoff with jitter to prevent thundering herd
jitter = random.uniform(0, 0.5 * backoff)
sleep_time = backoff + jitter
time.sleep(sleep_time)
backoff *= 2 # Exponential backoff
retry_count += 1
# Fallback to haversine after all retries failed
print(f"All retries failed for route from ({origin['latitude']},{origin['longitude']}) to ({destination['latitude']},{destination['longitude']}). Using haversine distance.")
distance = haversine_distance(
origin['latitude'], origin['longitude'],
destination['latitude'], destination['longitude']
)
distance = distance * 1.3 # Road factor
time_mins = (distance / 40) * 60 # 40 km/h
return round(distance, 2), round(time_mins, 2)
def get_road_distance(origins, destinations, use_osrm=True):
"""
Calculate actual road distances and travel times between multiple origins and destinations
using the OSRM (Open Source Routing Machine) API.
Parameters:
-----------
origins : list of dict
List of origin locations with 'latitude' and 'longitude' keys
destinations : list of dict
List of destination locations with 'latitude' and 'longitude' keys
use_osrm : bool, default=True
Whether to use OSRM API or fall back to haversine distance
Returns:
--------
tuple of (numpy.ndarray, numpy.ndarray)
Arrays containing distances (in km) and durations (in minutes) between each origin-destination pair
"""
n_origins = len(origins)
n_destinations = len(destinations)
distance_matrix = np.zeros((n_origins, n_destinations))
duration_matrix = np.zeros((n_origins, n_destinations))
# If OSRM is not requested, fall back to haversine distance
if not use_osrm:
print("Using haversine distance as fallback.")
for i, origin in enumerate(origins):
for j, dest in enumerate(destinations):
distance = haversine_distance(
origin['latitude'], origin['longitude'],
dest['latitude'], dest['longitude']
)
# Adjust for road networks (roads are typically not straight lines)
distance = distance * 1.3 # Apply a factor to approximate road distance
time_mins = (distance / 40) * 60 # Assuming average speed of 40 km/h
distance_matrix[i, j] = round(distance, 2)
duration_matrix[i, j] = round(time_mins, 2)
return distance_matrix, duration_matrix
# Process in batches to prevent overwhelming the API
print(f"Processing {n_origins} origins and {n_destinations} destinations in batches...")
total_requests = n_origins * n_destinations
completed = 0
try:
# Try OSRM's table service for small datasets first (more efficient)
if n_origins + n_destinations <= 50:
print("Trying OSRM table API for efficient matrix calculation...")
try:
# Code for table API would go here, but we'll skip for now as it's more complex
# and the batch approach is more reliable for handling errors
raise NotImplementedError("Table API not implemented, falling back to individual routes")
except Exception as e:
print(f"Table API failed: {e}. Using individual routes instead.")
# Continue with individual route requests below
# Process with individual route requests
for i, origin in enumerate(origins):
for j, dest in enumerate(destinations):
# Skip if origin and destination are the same point
if i == j:
distance_matrix[i, j] = 0
duration_matrix[i, j] = 0
completed += 1
continue
# Get distance with retry logic
distance, duration = get_road_distance_with_retry(origin, dest)
distance_matrix[i, j] = distance
duration_matrix[i, j] = duration
# Show progress
completed += 1
if completed % 10 == 0:
print(f"Progress: {completed}/{total_requests} routes calculated ({(completed/total_requests)*100:.1f}%)")
# Add randomized delay to prevent overwhelming the API
time.sleep(random.uniform(0.1, 0.5))
except KeyboardInterrupt:
print("\nOperation interrupted by user. Saving partial results...")
return distance_matrix, duration_matrix
def generate_travel_matrix(use_osrm=True):
"""
Generate travel time and distance matrices between all locations in the delivery problem.
Parameters:
-----------
use_osrm : bool, default=True
Whether to use OSRM API for real road distances instead of haversine
Returns:
--------
tuple of (pd.DataFrame, pd.DataFrame, dict)
Distance matrix, base time matrix, and hourly time matrices
"""
# Create data directories if they don't exist
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data')
time_matrix_dir = os.path.join(data_dir, 'time-matrix')
delivery_data_dir = os.path.join(data_dir, 'delivery-data')
vehicle_data_dir = os.path.join(data_dir, 'vehicle-data')
# Ensure all directories exist
for directory in [time_matrix_dir, delivery_data_dir, vehicle_data_dir]:
os.makedirs(directory, exist_ok=True)
# Read delivery and vehicle data
try:
delivery_data = pd.read_csv(os.path.join(delivery_data_dir, 'delivery_data.csv'))
vehicle_data = pd.read_csv(os.path.join(vehicle_data_dir, 'vehicle_data.csv'))
except FileNotFoundError:
print("Error: Please generate delivery and vehicle data first!")
return
# Extract locations
delivery_locations = delivery_data[['delivery_id', 'latitude', 'longitude']].values
depot_locations = vehicle_data[['vehicle_id', 'depot_latitude', 'depot_longitude']].values
# Average speed for time calculation (km/h)
avg_speed = vehicle_data['avg_speed_kmh'].mean()
# Traffic factor matrix (to simulate traffic conditions at different times)
hours_in_day = 24
traffic_factors = np.ones((hours_in_day, 1))
# Simulate morning rush hour (8-10 AM)
traffic_factors[8:10] = 1.5
# Simulate evening rush hour (5-7 PM)
traffic_factors[17:19] = 1.8
# Late night (less traffic)
traffic_factors[22:] = 0.8
traffic_factors[:5] = 0.7
# Create a combined list of all locations (depots + delivery points)
all_locations = []
# Add depot locations
for row in depot_locations:
all_locations.append({
'id': row[0], # vehicle_id as location id
'type': 'depot',
'latitude': row[1],
'longitude': row[2]
})
# Add delivery locations
for row in delivery_locations:
all_locations.append({
'id': row[0], # delivery_id as location id
'type': 'delivery',
'latitude': row[1],
'longitude': row[2]
})
print(f"Calculating distances between {len(all_locations)} locations...")
# Save the locations file early so we have this data even if the process is interrupted
location_df = pd.DataFrame(all_locations)
location_df.to_csv(os.path.join(time_matrix_dir, 'locations.csv'), index=False)
# Calculate distances and times using OSRM with improved error handling
if use_osrm:
print("Using OSRM API for road distances...")
distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=True)
else:
print("Using haversine distance with road factor adjustment...")
distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=False)
# Create DataFrames for the matrices
location_ids = [loc['id'] for loc in all_locations]
distance_df = pd.DataFrame(distance_matrix, index=location_ids, columns=location_ids)
time_df = pd.DataFrame(base_time_matrix, index=location_ids, columns=location_ids)
# Save distance and base time matrices early in case later steps fail
distance_df.to_csv(os.path.join(time_matrix_dir, 'distance_matrix.csv'))
time_df.to_csv(os.path.join(time_matrix_dir, 'base_time_matrix.csv'))
print("Basic distance and time matrices saved successfully.")
# Create time matrices for different hours of the day
hourly_time_matrices = {}
for hour in range(24):
traffic_factor = traffic_factors[hour][0]
hourly_time = base_time_matrix * traffic_factor
hourly_time_matrices[f"{hour:02d}:00"] = pd.DataFrame(hourly_time, index=location_ids, columns=location_ids)
# Save a sample of time matrices (e.g., rush hour and normal time)
try:
hourly_time_matrices['08:00'].to_csv(os.path.join(time_matrix_dir, 'morning_rush_time_matrix.csv'))
hourly_time_matrices['18:00'].to_csv(os.path.join(time_matrix_dir, 'evening_rush_time_matrix.csv'))
hourly_time_matrices['12:00'].to_csv(os.path.join(time_matrix_dir, 'midday_time_matrix.csv'))
hourly_time_matrices['00:00'].to_csv(os.path.join(time_matrix_dir, 'night_time_matrix.csv'))
print("Time matrices for different hours saved successfully.")
except Exception as e:
print(f"Error saving hourly time matrices: {e}")
print("Continuing with basic matrices only.")
print("Travel matrices generation complete.")
return distance_df, time_df, hourly_time_matrices
if __name__ == "__main__":
# For development, allow falling back to haversine if needed
import argparse
parser = argparse.ArgumentParser(description="Generate travel matrices for delivery route optimization")
parser.add_argument("--use-osrm", action="store_true", help="Use OSRM API for real road distances")
parser.add_argument("--use-haversine", action="store_true", help="Use haversine distance only (faster)")
args = parser.parse_args()
if args.use_haversine:
generate_travel_matrix(use_osrm=False)
else:
# Default to OSRM unless explicitly disabled
generate_travel_matrix(use_osrm=True) |