File size: 13,529 Bytes
300be5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
import pandas as pd
import numpy as np
import os
import time
import requests
from math import radians, sin, cos, sqrt, atan2
import random

def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the Haversine distance between two points in kilometers.
    The Haversine distance is the great-circle distance between two points on a sphere.
    
    Parameters:
    -----------
    lat1, lon1 : float
        Coordinates of the first point in decimal degrees
    lat2, lon2 : float
        Coordinates of the second point in decimal degrees
        
    Returns:
    --------
    float
        Distance between the two points in kilometers
    """
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    distance = 6371 * c  # Radius of Earth in kilometers
    
    return distance

def get_road_distance_with_retry(origin, destination, max_retries=3, initial_backoff=1):
    """
    Get road distance between two points with retry logic
    
    Parameters:
    -----------
    origin : dict
        Origin location with 'latitude' and 'longitude' keys
    destination : dict
        Destination location with 'latitude' and 'longitude' keys
    max_retries : int
        Maximum number of retry attempts
    initial_backoff : int
        Initial backoff time in seconds
    
    Returns:
    --------
    tuple of (float, float)
        Distance in km and duration in minutes
    """
    # URLs for different public OSRM instances to distribute load
    osrm_urls = [
        "http://router.project-osrm.org",
        "https://routing.openstreetmap.de",
        # Add more public OSRM servers if available
    ]
    
    retry_count = 0
    backoff = initial_backoff
    
    while retry_count < max_retries:
        try:
            # Use a random OSRM server from the list to distribute load
            base_url = random.choice(osrm_urls)
            url = f"{base_url}/route/v1/driving/{origin['longitude']},{origin['latitude']};{destination['longitude']},{destination['latitude']}?overview=false"
            
            # Add a timeout to prevent hanging connections
            response = requests.get(url, timeout=5)
            data = response.json()
            
            if data.get('code') == 'Ok':
                # Extract distance and duration
                distance = data['routes'][0]['distance'] / 1000  # meters to km
                duration = data['routes'][0]['duration'] / 60    # seconds to minutes
                return round(distance, 2), round(duration, 2)
            else:
                print(f"API returned error: {data.get('message', 'Unknown error')}")
        
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retry {retry_count+1}/{max_retries}")
        
        # Exponential backoff with jitter to prevent thundering herd
        jitter = random.uniform(0, 0.5 * backoff)
        sleep_time = backoff + jitter
        time.sleep(sleep_time)
        backoff *= 2  # Exponential backoff
        retry_count += 1
    
    # Fallback to haversine after all retries failed
    print(f"All retries failed for route from ({origin['latitude']},{origin['longitude']}) to ({destination['latitude']},{destination['longitude']}). Using haversine distance.")
    distance = haversine_distance(
        origin['latitude'], origin['longitude'],
        destination['latitude'], destination['longitude']
    )
    distance = distance * 1.3  # Road factor
    time_mins = (distance / 40) * 60  # 40 km/h
    
    return round(distance, 2), round(time_mins, 2)

def get_road_distance(origins, destinations, use_osrm=True):
    """
    Calculate actual road distances and travel times between multiple origins and destinations
    using the OSRM (Open Source Routing Machine) API.
    
    Parameters:
    -----------
    origins : list of dict
        List of origin locations with 'latitude' and 'longitude' keys
    destinations : list of dict
        List of destination locations with 'latitude' and 'longitude' keys
    use_osrm : bool, default=True
        Whether to use OSRM API or fall back to haversine distance
        
    Returns:
    --------
    tuple of (numpy.ndarray, numpy.ndarray)
        Arrays containing distances (in km) and durations (in minutes) between each origin-destination pair
    """
    n_origins = len(origins)
    n_destinations = len(destinations)
    distance_matrix = np.zeros((n_origins, n_destinations))
    duration_matrix = np.zeros((n_origins, n_destinations))
    
    # If OSRM is not requested, fall back to haversine distance
    if not use_osrm:
        print("Using haversine distance as fallback.")
        for i, origin in enumerate(origins):
            for j, dest in enumerate(destinations):
                distance = haversine_distance(
                    origin['latitude'], origin['longitude'],
                    dest['latitude'], dest['longitude']
                )
                # Adjust for road networks (roads are typically not straight lines)
                distance = distance * 1.3  # Apply a factor to approximate road distance
                time_mins = (distance / 40) * 60  # Assuming average speed of 40 km/h
                
                distance_matrix[i, j] = round(distance, 2)
                duration_matrix[i, j] = round(time_mins, 2)
        return distance_matrix, duration_matrix
    
    # Process in batches to prevent overwhelming the API
    print(f"Processing {n_origins} origins and {n_destinations} destinations in batches...")
    total_requests = n_origins * n_destinations
    completed = 0
    
    try:
        # Try OSRM's table service for small datasets first (more efficient)
        if n_origins + n_destinations <= 50:
            print("Trying OSRM table API for efficient matrix calculation...")
            try:
                # Code for table API would go here, but we'll skip for now as it's more complex
                # and the batch approach is more reliable for handling errors
                raise NotImplementedError("Table API not implemented, falling back to individual routes")
            except Exception as e:
                print(f"Table API failed: {e}. Using individual routes instead.")
                # Continue with individual route requests below
        
        # Process with individual route requests
        for i, origin in enumerate(origins):
            for j, dest in enumerate(destinations):
                # Skip if origin and destination are the same point
                if i == j:
                    distance_matrix[i, j] = 0
                    duration_matrix[i, j] = 0
                    completed += 1
                    continue
                
                # Get distance with retry logic
                distance, duration = get_road_distance_with_retry(origin, dest)
                distance_matrix[i, j] = distance
                duration_matrix[i, j] = duration
                
                # Show progress
                completed += 1
                if completed % 10 == 0:
                    print(f"Progress: {completed}/{total_requests} routes calculated ({(completed/total_requests)*100:.1f}%)")
                
                # Add randomized delay to prevent overwhelming the API
                time.sleep(random.uniform(0.1, 0.5))
    
    except KeyboardInterrupt:
        print("\nOperation interrupted by user. Saving partial results...")
    
    return distance_matrix, duration_matrix

def generate_travel_matrix(use_osrm=True):
    """
    Generate travel time and distance matrices between all locations in the delivery problem.
    
    Parameters:
    -----------
    use_osrm : bool, default=True
        Whether to use OSRM API for real road distances instead of haversine
        
    Returns:
    --------
    tuple of (pd.DataFrame, pd.DataFrame, dict)
        Distance matrix, base time matrix, and hourly time matrices
    """
    # Create data directories if they don't exist
    data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data')
    time_matrix_dir = os.path.join(data_dir, 'time-matrix')
    delivery_data_dir = os.path.join(data_dir, 'delivery-data')    
    vehicle_data_dir = os.path.join(data_dir, 'vehicle-data')
    
    # Ensure all directories exist
    for directory in [time_matrix_dir, delivery_data_dir, vehicle_data_dir]:
        os.makedirs(directory, exist_ok=True)
     
    # Read delivery and vehicle data
    try:
        delivery_data = pd.read_csv(os.path.join(delivery_data_dir, 'delivery_data.csv'))
        vehicle_data = pd.read_csv(os.path.join(vehicle_data_dir, 'vehicle_data.csv'))
    except FileNotFoundError:
        print("Error: Please generate delivery and vehicle data first!")
        return
    
    # Extract locations
    delivery_locations = delivery_data[['delivery_id', 'latitude', 'longitude']].values
    depot_locations = vehicle_data[['vehicle_id', 'depot_latitude', 'depot_longitude']].values
    
    # Average speed for time calculation (km/h)
    avg_speed = vehicle_data['avg_speed_kmh'].mean()
    
    # Traffic factor matrix (to simulate traffic conditions at different times)
    hours_in_day = 24
    traffic_factors = np.ones((hours_in_day, 1))
    
    # Simulate morning rush hour (8-10 AM)
    traffic_factors[8:10] = 1.5
    
    # Simulate evening rush hour (5-7 PM)
    traffic_factors[17:19] = 1.8
    
    # Late night (less traffic)
    traffic_factors[22:] = 0.8
    traffic_factors[:5] = 0.7
    
    # Create a combined list of all locations (depots + delivery points)
    all_locations = []
    
    # Add depot locations
    for row in depot_locations:
        all_locations.append({
            'id': row[0],  # vehicle_id as location id
            'type': 'depot',
            'latitude': row[1],
            'longitude': row[2]
        })
    
    # Add delivery locations
    for row in delivery_locations:
        all_locations.append({
            'id': row[0],  # delivery_id as location id
            'type': 'delivery',
            'latitude': row[1],
            'longitude': row[2]
        })
    
    print(f"Calculating distances between {len(all_locations)} locations...")
    
    # Save the locations file early so we have this data even if the process is interrupted
    location_df = pd.DataFrame(all_locations)
    location_df.to_csv(os.path.join(time_matrix_dir, 'locations.csv'), index=False)
    
    # Calculate distances and times using OSRM with improved error handling
    if use_osrm:
        print("Using OSRM API for road distances...")
        distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=True)
    else:
        print("Using haversine distance with road factor adjustment...")
        distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=False)
    
    # Create DataFrames for the matrices
    location_ids = [loc['id'] for loc in all_locations]
    
    distance_df = pd.DataFrame(distance_matrix, index=location_ids, columns=location_ids)
    time_df = pd.DataFrame(base_time_matrix, index=location_ids, columns=location_ids)
    
    # Save distance and base time matrices early in case later steps fail
    distance_df.to_csv(os.path.join(time_matrix_dir, 'distance_matrix.csv'))
    time_df.to_csv(os.path.join(time_matrix_dir, 'base_time_matrix.csv'))
    print("Basic distance and time matrices saved successfully.")
    
    # Create time matrices for different hours of the day
    hourly_time_matrices = {}
    for hour in range(24):
        traffic_factor = traffic_factors[hour][0]
        hourly_time = base_time_matrix * traffic_factor
        hourly_time_matrices[f"{hour:02d}:00"] = pd.DataFrame(hourly_time, index=location_ids, columns=location_ids)
    
    # Save a sample of time matrices (e.g., rush hour and normal time)
    try:
        hourly_time_matrices['08:00'].to_csv(os.path.join(time_matrix_dir, 'morning_rush_time_matrix.csv'))
        hourly_time_matrices['18:00'].to_csv(os.path.join(time_matrix_dir, 'evening_rush_time_matrix.csv'))
        hourly_time_matrices['12:00'].to_csv(os.path.join(time_matrix_dir, 'midday_time_matrix.csv'))
        hourly_time_matrices['00:00'].to_csv(os.path.join(time_matrix_dir, 'night_time_matrix.csv'))
        print("Time matrices for different hours saved successfully.")
    except Exception as e:
        print(f"Error saving hourly time matrices: {e}")
        print("Continuing with basic matrices only.")
    
    print("Travel matrices generation complete.")
    return distance_df, time_df, hourly_time_matrices

if __name__ == "__main__":
    # For development, allow falling back to haversine if needed
    import argparse
    
    parser = argparse.ArgumentParser(description="Generate travel matrices for delivery route optimization")
    parser.add_argument("--use-osrm", action="store_true", help="Use OSRM API for real road distances")
    parser.add_argument("--use-haversine", action="store_true", help="Use haversine distance only (faster)")
    
    args = parser.parse_args()
    
    if args.use_haversine:
        generate_travel_matrix(use_osrm=False)
    else:
        # Default to OSRM unless explicitly disabled
        generate_travel_matrix(use_osrm=True)