Jing997 commited on
Commit
300be5a
·
1 Parent(s): dc171c8

add utils src

Browse files
src/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is intentionally left blank.
src/utils/generate_all_data.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ # Add the project root directory to the Python path
6
+ sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
7
+
8
+ def create_data_directory():
9
+ """
10
+ Ensure data directories exist for all generated files.
11
+
12
+ This function creates the necessary directory structure to store
13
+ delivery data, vehicle data, and travel time matrices.
14
+
15
+ Returns:
16
+ --------
17
+ tuple of (str, str, str)
18
+ Paths to time matrix directory, vehicle data directory, and delivery data directory
19
+ """
20
+ vehicle_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'vehicle-data')
21
+ os.makedirs(vehicle_data_dir, exist_ok=True)
22
+
23
+ delivery_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'delivery-data')
24
+ os.makedirs(delivery_data_dir, exist_ok=True)
25
+
26
+ time_matrix_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'time-matrix')
27
+ os.makedirs(time_matrix_data_dir, exist_ok=True)
28
+ return time_matrix_data_dir, vehicle_data_dir, delivery_data_dir
29
+
30
+ def main():
31
+ """
32
+ Run all data generation scripts for the delivery route optimization project.
33
+
34
+ This function orchestrates the creation of all synthetic datasets needed for
35
+ the route optimization problem, including delivery data, vehicle data, and
36
+ travel time/distance matrices.
37
+
38
+ Generated Files:
39
+ --------------
40
+ 1. Delivery Data:
41
+ - Contains information about delivery locations, time windows, packages, etc.
42
+ - Used to define the delivery stops in the routing problem.
43
+
44
+ 2. Vehicle Data:
45
+ - Contains information about the delivery fleet, capacity, depots, etc.
46
+ - Used to define the available resources for delivery routes.
47
+
48
+ 3. Travel Matrices:
49
+ - Contains distance and time information between all locations.
50
+ - Used by the optimization algorithm to calculate route costs.
51
+
52
+ Usage:
53
+ ------
54
+ These generated datasets form the foundation of the delivery route optimization
55
+ application. Together they define:
56
+ - Where deliveries need to be made (delivery data)
57
+ - What resources are available for deliveries (vehicle data)
58
+ - How long it takes to travel between locations (travel matrices)
59
+
60
+ The route optimization algorithm uses these inputs to determine the most
61
+ efficient assignment of deliveries to vehicles and the optimal sequence of
62
+ stops for each vehicle.
63
+ """
64
+ print("Starting data generation process...")
65
+
66
+ time_matrix_data_dir, vehicle_data_dir, delivery_data_dir = create_data_directory()
67
+ print(f"Time Matrix Data will be saved to: {time_matrix_data_dir}")
68
+ print(f"Delivery Data will be saved to: {delivery_data_dir}")
69
+ print(f"Vehicle Data will be saved to: {vehicle_data_dir}")
70
+
71
+ # Import and run delivery data generation
72
+ print("\n1. Generating delivery data...")
73
+ from src.utils.generate_delivery_data import generate_delivery_data
74
+ delivery_data = generate_delivery_data(50, use_geocoding=True)
75
+
76
+ # Import and run vehicle data generation
77
+ print("\n2. Generating vehicle data...")
78
+ from src.utils.generate_vehicle_data import generate_vehicle_data
79
+ vehicle_data = generate_vehicle_data(10)
80
+
81
+ # Import and run travel matrix generation
82
+ print("\n3. Generating travel matrices...")
83
+ from src.utils.generate_travel_matrix import generate_travel_matrix
84
+ generate_travel_matrix()
85
+
86
+ print("\nAll data generation complete! Files saved to data directory.")
87
+
88
+ if __name__ == "__main__":
89
+ main()
src/utils/generate_delivery_data.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import random
4
+ from datetime import datetime, timedelta
5
+ import os
6
+ import time
7
+ import requests
8
+ from geopy.geocoders import Nominatim
9
+
10
+ # Set random seed for reproducibility
11
+ np.random.seed(42)
12
+
13
+ def generate_delivery_data(n_deliveries=50, use_geocoding=False):
14
+ """
15
+ Generate synthetic delivery data with realistic Singapore addresses
16
+ """
17
+ # Define real Singapore neighborhoods and their actual streets
18
+ # Format: [neighborhood_name, [list_of_real_streets], postal_code_prefix]
19
+ sg_neighborhoods = [
20
+ ['Ang Mo Kio', ['Ang Mo Kio Avenue 1', 'Ang Mo Kio Avenue 3', 'Ang Mo Kio Avenue 4', 'Ang Mo Kio Avenue 10'], '56'],
21
+ ['Bedok', ['Bedok North Avenue 1', 'Bedok North Road', 'Bedok Reservoir Road', 'New Upper Changi Road'], '46'],
22
+ ['Bishan', ['Bishan Street 11', 'Bishan Street 12', 'Bishan Street 13', 'Bishan Street 22'], '57'],
23
+ ['Bukit Merah', ['Jalan Bukit Merah', 'Henderson Road', 'Tiong Bahru Road', 'Redhill Close'], '15'],
24
+ ['Bukit Batok', ['Bukit Batok East Avenue 6', 'Bukit Batok West Avenue 8', 'Bukit Batok Street 21'], '65'],
25
+ ['Clementi', ['Clementi Avenue 1', 'Clementi Avenue 4', 'Clementi Road', 'Commonwealth Avenue West'], '12'],
26
+ ['Geylang', ['Geylang East Avenue 1', 'Geylang Road', 'Guillemard Road', 'Sims Avenue'], '38'],
27
+ ['Hougang', ['Hougang Avenue 1', 'Hougang Avenue 7', 'Hougang Street 91', 'Upper Serangoon Road'], '53'],
28
+ ['Jurong East', ['Jurong East Street 13', 'Jurong East Avenue 1', 'Jurong Gateway Road'], '60'],
29
+ ['Jurong West', ['Jurong West Street 41', 'Jurong West Street 52', 'Jurong West Street 93'], '64'],
30
+ ['Kallang', ['Kallang Avenue', 'Geylang Bahru', 'Boon Keng Road', 'Upper Boon Keng Road'], '33'],
31
+ ['Punggol', ['Punggol Central', 'Punggol Field', 'Punggol Road', 'Punggol Way'], '82'],
32
+ ['Queenstown', ['Commonwealth Avenue', 'Commonwealth Drive', 'Mei Chin Road', 'Stirling Road'], '14'],
33
+ ['Sengkang', ['Sengkang East Way', 'Sengkang West Way', 'Compassvale Road', 'Fernvale Road'], '54'],
34
+ ['Serangoon', ['Serangoon Avenue 2', 'Serangoon Avenue 3', 'Serangoon North Avenue 1'], '55'],
35
+ ['Tampines', ['Tampines Street 11', 'Tampines Street 21', 'Tampines Avenue 1', 'Tampines Avenue 4'], '52'],
36
+ ['Toa Payoh', ['Toa Payoh Lorong 1', 'Toa Payoh Lorong 2', 'Toa Payoh Lorong 4', 'Toa Payoh Central'], '31'],
37
+ ['Woodlands', ['Woodlands Avenue 1', 'Woodlands Drive 16', 'Woodlands Drive 72', 'Woodlands Circle'], '73'],
38
+ ['Yishun', ['Yishun Avenue 1', 'Yishun Avenue 4', 'Yishun Ring Road', 'Yishun Street 22'], '76']
39
+ ]
40
+
41
+ # Bounding boxes for neighborhoods (for fallback coordinates)
42
+ # Format: [name, min_lat, max_lat, min_lon, max_lon]
43
+ neighborhood_bounds = {
44
+ 'Ang Mo Kio': [1.360000, 1.380000, 103.830000, 103.860000],
45
+ 'Bedok': [1.320000, 1.335000, 103.920000, 103.950000],
46
+ 'Bishan': [1.345000, 1.360000, 103.830000, 103.855000],
47
+ 'Bukit Merah': [1.270000, 1.290000, 103.800000, 103.830000],
48
+ 'Bukit Batok': [1.340000, 1.360000, 103.740000, 103.770000],
49
+ 'Clementi': [1.310000, 1.325000, 103.750000, 103.780000],
50
+ 'Geylang': [1.310000, 1.325000, 103.880000, 103.900000],
51
+ 'Hougang': [1.370000, 1.385000, 103.880000, 103.900000],
52
+ 'Jurong East': [1.330000, 1.345000, 103.730000, 103.750000],
53
+ 'Jurong West': [1.340000, 1.360000, 103.690000, 103.720000],
54
+ 'Kallang': [1.300000, 1.320000, 103.850000, 103.880000],
55
+ 'Punggol': [1.390000, 1.410000, 103.900000, 103.920000],
56
+ 'Queenstown': [1.290000, 1.310000, 103.780000, 103.805000],
57
+ 'Sengkang': [1.380000, 1.395000, 103.870000, 103.900000],
58
+ 'Serangoon': [1.345000, 1.360000, 103.865000, 103.885000],
59
+ 'Tampines': [1.345000, 1.365000, 103.930000, 103.960000],
60
+ 'Toa Payoh': [1.326000, 1.341000, 103.840000, 103.865000],
61
+ 'Woodlands': [1.430000, 1.450000, 103.770000, 103.800000],
62
+ 'Yishun': [1.410000, 1.430000, 103.820000, 103.850000]
63
+ }
64
+
65
+ # Generate delivery IDs
66
+ delivery_ids = [f'DEL{str(i).zfill(4)}' for i in range(1, n_deliveries + 1)]
67
+
68
+ # Generate customer names (fictional)
69
+ first_names = ['Tan', 'Lim', 'Lee', 'Ng', 'Wong', 'Chan', 'Goh', 'Ong', 'Teo', 'Koh',
70
+ 'Chua', 'Loh', 'Yeo', 'Sim', 'Ho', 'Ang', 'Tay', 'Yap', 'Leong', 'Foo']
71
+ last_names = ['Wei', 'Ming', 'Hui', 'Ling', 'Yong', 'Jun', 'Hong', 'Xin', 'Yi', 'Jie',
72
+ 'Cheng', 'Kai', 'Zhi', 'Tian', 'Yu', 'En', 'Yang', 'Hao', 'Chong', 'Zheng']
73
+ customer_names = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(n_deliveries)]
74
+
75
+ addresses = []
76
+ postal_codes = []
77
+ latitudes = []
78
+ longitudes = []
79
+ neighborhood_names = []
80
+
81
+ # Initialize geocoder if using geocoding
82
+ if use_geocoding:
83
+ geolocator = Nominatim(user_agent="delivery_app")
84
+
85
+ # Generate realistic addresses
86
+ for i in range(n_deliveries):
87
+ # Randomly select a neighborhood
88
+ neighborhood_data = random.choice(sg_neighborhoods)
89
+ neighborhood = neighborhood_data[0]
90
+ streets = neighborhood_data[1]
91
+ postal_prefix = neighborhood_data[2]
92
+
93
+ # Randomly select a street in that neighborhood
94
+ street = random.choice(streets)
95
+
96
+ # Generate block number (realistic for HDB)
97
+ block = random.randint(100, 600)
98
+
99
+ # Generate unit number
100
+ unit_floor = random.randint(2, 20)
101
+ unit_number = random.randint(1, 150)
102
+
103
+ # Generate postal code (with realistic prefix)
104
+ postal_suffix = str(random.randint(0, 999)).zfill(3)
105
+ postal_code = postal_prefix + postal_suffix
106
+
107
+ # Create two formats of address - one for display, one for geocoding
108
+ display_address = f"Block {block}, #{unit_floor:02d}-{unit_number:03d}, {street}, Singapore {postal_code}"
109
+ geocode_address = f"{block} {street}, Singapore {postal_code}" # Simpler format for geocoding
110
+
111
+ # Default coordinates from neighborhood bounding box (fallback)
112
+ bounds = neighborhood_bounds[neighborhood]
113
+ default_lat = round(random.uniform(bounds[0], bounds[1]), 6)
114
+ default_lon = round(random.uniform(bounds[2], bounds[3]), 6)
115
+
116
+ # Use geocoding API if requested
117
+ if use_geocoding:
118
+ try:
119
+ location = geolocator.geocode(geocode_address)
120
+
121
+ if location:
122
+ lat = location.latitude
123
+ lon = location.longitude
124
+ print(f"✓ Successfully geocoded: {geocode_address} → ({lat}, {lon})")
125
+ else:
126
+ # First fallback: try with just street and postal code
127
+ simpler_address = f"{street}, Singapore {postal_code}"
128
+ location = geolocator.geocode(simpler_address)
129
+
130
+ if location:
131
+ lat = location.latitude
132
+ lon = location.longitude
133
+ print(f"✓ Fallback geocoded: {simpler_address} → ({lat}, {lon})")
134
+ else:
135
+ # Second fallback: just use the neighborhood center
136
+ lat = default_lat
137
+ lon = default_lon
138
+ print(f"✗ Could not geocode: {geocode_address}, using neighborhood coordinates")
139
+
140
+ # Add delay to avoid being rate limited
141
+ time.sleep(1)
142
+
143
+ except Exception as e:
144
+ print(f"✗ Geocoding error for {geocode_address}: {str(e)}")
145
+ lat = default_lat
146
+ lon = default_lon
147
+ else:
148
+ # Without geocoding, use the default coordinates
149
+ lat = default_lat
150
+ lon = default_lon
151
+
152
+ addresses.append(display_address)
153
+ postal_codes.append(postal_code)
154
+ latitudes.append(lat)
155
+ longitudes.append(lon)
156
+ neighborhood_names.append(neighborhood)
157
+
158
+ # Generate delivery dates (within the next 7 days)
159
+ base_date = datetime.now().date()
160
+ delivery_dates = [base_date + timedelta(days=random.randint(1, 7)) for _ in range(n_deliveries)]
161
+
162
+ # Generate time windows (between 9 AM and 5 PM)
163
+ time_windows = []
164
+ for _ in range(n_deliveries):
165
+ start_hour = random.randint(9, 16)
166
+ window_length = random.choice([1, 2, 3]) # 1, 2, or 3 hour windows
167
+ end_hour = min(start_hour + window_length, 18)
168
+
169
+ start_time = f"{start_hour:02d}:00"
170
+ end_time = f"{end_hour:02d}:00"
171
+ time_windows.append(f"{start_time}-{end_time}")
172
+
173
+ # Generate package details
174
+ weights = np.random.uniform(0.5, 20.0, n_deliveries) # in kg
175
+ volumes = np.random.uniform(0.01, 0.5, n_deliveries) # in m³
176
+
177
+ # Priority levels
178
+ priorities = np.random.choice(['High', 'Medium', 'Low'], n_deliveries,
179
+ p=[0.2, 0.5, 0.3]) # 20% High, 50% Medium, 30% Low
180
+
181
+ # Required vehicle type
182
+ vehicle_types = np.random.choice(['Standard', 'Large', 'Refrigerated'], n_deliveries,
183
+ p=[0.7, 0.2, 0.1])
184
+
185
+ # Status
186
+ statuses = np.random.choice(['Pending', 'Assigned', 'In Transit', 'Delivered'], n_deliveries,
187
+ p=[0.6, 0.2, 0.15, 0.05])
188
+
189
+ # Additional notes
190
+ notes = []
191
+ special_instructions = [
192
+ 'Call customer before delivery',
193
+ 'Fragile items',
194
+ 'Leave at door',
195
+ 'Signature required',
196
+ 'No delivery on weekends',
197
+ None
198
+ ]
199
+
200
+ for _ in range(n_deliveries):
201
+ if random.random() < 0.7: # 70% chance of having a note
202
+ notes.append(random.choice(special_instructions))
203
+ else:
204
+ notes.append(None)
205
+
206
+ # Create DataFrame
207
+ df = pd.DataFrame({
208
+ 'delivery_id': delivery_ids,
209
+ 'customer_name': customer_names,
210
+ 'address': addresses,
211
+ 'postal_code': postal_codes,
212
+ 'neighborhood': neighborhood_names,
213
+ 'latitude': latitudes,
214
+ 'longitude': longitudes,
215
+ 'delivery_date': delivery_dates,
216
+ 'time_window': time_windows,
217
+ 'weight_kg': weights.round(2),
218
+ 'volume_m3': volumes.round(3),
219
+ 'priority': priorities,
220
+ 'vehicle_type': vehicle_types,
221
+ 'status': statuses,
222
+ 'special_instructions': notes
223
+ })
224
+
225
+ # Ensure the directory exists
226
+ data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'delivery-data')
227
+ os.makedirs(data_dir, exist_ok=True)
228
+
229
+ # Save to CSV
230
+ output_path = os.path.join(data_dir, 'delivery_data.csv')
231
+ df.to_csv(output_path, index=False)
232
+ print(f"Delivery data generated and saved to {output_path}")
233
+ return df
234
+
235
+ if __name__ == "__main__":
236
+ # Set to True if you want to use real geocoding (slower but more accurate)
237
+ USE_GEOCODING = True
238
+ delivery_data = generate_delivery_data(50, use_geocoding=USE_GEOCODING)
239
+ print("Sample of delivery data:")
240
+ print(delivery_data.head())
241
+
src/utils/generate_travel_matrix.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import os
4
+ import time
5
+ import requests
6
+ from math import radians, sin, cos, sqrt, atan2
7
+ import random
8
+
9
+ def haversine_distance(lat1, lon1, lat2, lon2):
10
+ """
11
+ Calculate the Haversine distance between two points in kilometers.
12
+ The Haversine distance is the great-circle distance between two points on a sphere.
13
+
14
+ Parameters:
15
+ -----------
16
+ lat1, lon1 : float
17
+ Coordinates of the first point in decimal degrees
18
+ lat2, lon2 : float
19
+ Coordinates of the second point in decimal degrees
20
+
21
+ Returns:
22
+ --------
23
+ float
24
+ Distance between the two points in kilometers
25
+ """
26
+ # Convert decimal degrees to radians
27
+ lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
28
+
29
+ # Haversine formula
30
+ dlon = lon2 - lon1
31
+ dlat = lat2 - lat1
32
+ a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
33
+ c = 2 * atan2(sqrt(a), sqrt(1-a))
34
+ distance = 6371 * c # Radius of Earth in kilometers
35
+
36
+ return distance
37
+
38
+ def get_road_distance_with_retry(origin, destination, max_retries=3, initial_backoff=1):
39
+ """
40
+ Get road distance between two points with retry logic
41
+
42
+ Parameters:
43
+ -----------
44
+ origin : dict
45
+ Origin location with 'latitude' and 'longitude' keys
46
+ destination : dict
47
+ Destination location with 'latitude' and 'longitude' keys
48
+ max_retries : int
49
+ Maximum number of retry attempts
50
+ initial_backoff : int
51
+ Initial backoff time in seconds
52
+
53
+ Returns:
54
+ --------
55
+ tuple of (float, float)
56
+ Distance in km and duration in minutes
57
+ """
58
+ # URLs for different public OSRM instances to distribute load
59
+ osrm_urls = [
60
+ "http://router.project-osrm.org",
61
+ "https://routing.openstreetmap.de",
62
+ # Add more public OSRM servers if available
63
+ ]
64
+
65
+ retry_count = 0
66
+ backoff = initial_backoff
67
+
68
+ while retry_count < max_retries:
69
+ try:
70
+ # Use a random OSRM server from the list to distribute load
71
+ base_url = random.choice(osrm_urls)
72
+ url = f"{base_url}/route/v1/driving/{origin['longitude']},{origin['latitude']};{destination['longitude']},{destination['latitude']}?overview=false"
73
+
74
+ # Add a timeout to prevent hanging connections
75
+ response = requests.get(url, timeout=5)
76
+ data = response.json()
77
+
78
+ if data.get('code') == 'Ok':
79
+ # Extract distance and duration
80
+ distance = data['routes'][0]['distance'] / 1000 # meters to km
81
+ duration = data['routes'][0]['duration'] / 60 # seconds to minutes
82
+ return round(distance, 2), round(duration, 2)
83
+ else:
84
+ print(f"API returned error: {data.get('message', 'Unknown error')}")
85
+
86
+ except requests.exceptions.RequestException as e:
87
+ print(f"Request failed: {e}. Retry {retry_count+1}/{max_retries}")
88
+
89
+ # Exponential backoff with jitter to prevent thundering herd
90
+ jitter = random.uniform(0, 0.5 * backoff)
91
+ sleep_time = backoff + jitter
92
+ time.sleep(sleep_time)
93
+ backoff *= 2 # Exponential backoff
94
+ retry_count += 1
95
+
96
+ # Fallback to haversine after all retries failed
97
+ print(f"All retries failed for route from ({origin['latitude']},{origin['longitude']}) to ({destination['latitude']},{destination['longitude']}). Using haversine distance.")
98
+ distance = haversine_distance(
99
+ origin['latitude'], origin['longitude'],
100
+ destination['latitude'], destination['longitude']
101
+ )
102
+ distance = distance * 1.3 # Road factor
103
+ time_mins = (distance / 40) * 60 # 40 km/h
104
+
105
+ return round(distance, 2), round(time_mins, 2)
106
+
107
+ def get_road_distance(origins, destinations, use_osrm=True):
108
+ """
109
+ Calculate actual road distances and travel times between multiple origins and destinations
110
+ using the OSRM (Open Source Routing Machine) API.
111
+
112
+ Parameters:
113
+ -----------
114
+ origins : list of dict
115
+ List of origin locations with 'latitude' and 'longitude' keys
116
+ destinations : list of dict
117
+ List of destination locations with 'latitude' and 'longitude' keys
118
+ use_osrm : bool, default=True
119
+ Whether to use OSRM API or fall back to haversine distance
120
+
121
+ Returns:
122
+ --------
123
+ tuple of (numpy.ndarray, numpy.ndarray)
124
+ Arrays containing distances (in km) and durations (in minutes) between each origin-destination pair
125
+ """
126
+ n_origins = len(origins)
127
+ n_destinations = len(destinations)
128
+ distance_matrix = np.zeros((n_origins, n_destinations))
129
+ duration_matrix = np.zeros((n_origins, n_destinations))
130
+
131
+ # If OSRM is not requested, fall back to haversine distance
132
+ if not use_osrm:
133
+ print("Using haversine distance as fallback.")
134
+ for i, origin in enumerate(origins):
135
+ for j, dest in enumerate(destinations):
136
+ distance = haversine_distance(
137
+ origin['latitude'], origin['longitude'],
138
+ dest['latitude'], dest['longitude']
139
+ )
140
+ # Adjust for road networks (roads are typically not straight lines)
141
+ distance = distance * 1.3 # Apply a factor to approximate road distance
142
+ time_mins = (distance / 40) * 60 # Assuming average speed of 40 km/h
143
+
144
+ distance_matrix[i, j] = round(distance, 2)
145
+ duration_matrix[i, j] = round(time_mins, 2)
146
+ return distance_matrix, duration_matrix
147
+
148
+ # Process in batches to prevent overwhelming the API
149
+ print(f"Processing {n_origins} origins and {n_destinations} destinations in batches...")
150
+ total_requests = n_origins * n_destinations
151
+ completed = 0
152
+
153
+ try:
154
+ # Try OSRM's table service for small datasets first (more efficient)
155
+ if n_origins + n_destinations <= 50:
156
+ print("Trying OSRM table API for efficient matrix calculation...")
157
+ try:
158
+ # Code for table API would go here, but we'll skip for now as it's more complex
159
+ # and the batch approach is more reliable for handling errors
160
+ raise NotImplementedError("Table API not implemented, falling back to individual routes")
161
+ except Exception as e:
162
+ print(f"Table API failed: {e}. Using individual routes instead.")
163
+ # Continue with individual route requests below
164
+
165
+ # Process with individual route requests
166
+ for i, origin in enumerate(origins):
167
+ for j, dest in enumerate(destinations):
168
+ # Skip if origin and destination are the same point
169
+ if i == j:
170
+ distance_matrix[i, j] = 0
171
+ duration_matrix[i, j] = 0
172
+ completed += 1
173
+ continue
174
+
175
+ # Get distance with retry logic
176
+ distance, duration = get_road_distance_with_retry(origin, dest)
177
+ distance_matrix[i, j] = distance
178
+ duration_matrix[i, j] = duration
179
+
180
+ # Show progress
181
+ completed += 1
182
+ if completed % 10 == 0:
183
+ print(f"Progress: {completed}/{total_requests} routes calculated ({(completed/total_requests)*100:.1f}%)")
184
+
185
+ # Add randomized delay to prevent overwhelming the API
186
+ time.sleep(random.uniform(0.1, 0.5))
187
+
188
+ except KeyboardInterrupt:
189
+ print("\nOperation interrupted by user. Saving partial results...")
190
+
191
+ return distance_matrix, duration_matrix
192
+
193
+ def generate_travel_matrix(use_osrm=True):
194
+ """
195
+ Generate travel time and distance matrices between all locations in the delivery problem.
196
+
197
+ Parameters:
198
+ -----------
199
+ use_osrm : bool, default=True
200
+ Whether to use OSRM API for real road distances instead of haversine
201
+
202
+ Returns:
203
+ --------
204
+ tuple of (pd.DataFrame, pd.DataFrame, dict)
205
+ Distance matrix, base time matrix, and hourly time matrices
206
+ """
207
+ # Create data directories if they don't exist
208
+ data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data')
209
+ time_matrix_dir = os.path.join(data_dir, 'time-matrix')
210
+ delivery_data_dir = os.path.join(data_dir, 'delivery-data')
211
+ vehicle_data_dir = os.path.join(data_dir, 'vehicle-data')
212
+
213
+ # Ensure all directories exist
214
+ for directory in [time_matrix_dir, delivery_data_dir, vehicle_data_dir]:
215
+ os.makedirs(directory, exist_ok=True)
216
+
217
+ # Read delivery and vehicle data
218
+ try:
219
+ delivery_data = pd.read_csv(os.path.join(delivery_data_dir, 'delivery_data.csv'))
220
+ vehicle_data = pd.read_csv(os.path.join(vehicle_data_dir, 'vehicle_data.csv'))
221
+ except FileNotFoundError:
222
+ print("Error: Please generate delivery and vehicle data first!")
223
+ return
224
+
225
+ # Extract locations
226
+ delivery_locations = delivery_data[['delivery_id', 'latitude', 'longitude']].values
227
+ depot_locations = vehicle_data[['vehicle_id', 'depot_latitude', 'depot_longitude']].values
228
+
229
+ # Average speed for time calculation (km/h)
230
+ avg_speed = vehicle_data['avg_speed_kmh'].mean()
231
+
232
+ # Traffic factor matrix (to simulate traffic conditions at different times)
233
+ hours_in_day = 24
234
+ traffic_factors = np.ones((hours_in_day, 1))
235
+
236
+ # Simulate morning rush hour (8-10 AM)
237
+ traffic_factors[8:10] = 1.5
238
+
239
+ # Simulate evening rush hour (5-7 PM)
240
+ traffic_factors[17:19] = 1.8
241
+
242
+ # Late night (less traffic)
243
+ traffic_factors[22:] = 0.8
244
+ traffic_factors[:5] = 0.7
245
+
246
+ # Create a combined list of all locations (depots + delivery points)
247
+ all_locations = []
248
+
249
+ # Add depot locations
250
+ for row in depot_locations:
251
+ all_locations.append({
252
+ 'id': row[0], # vehicle_id as location id
253
+ 'type': 'depot',
254
+ 'latitude': row[1],
255
+ 'longitude': row[2]
256
+ })
257
+
258
+ # Add delivery locations
259
+ for row in delivery_locations:
260
+ all_locations.append({
261
+ 'id': row[0], # delivery_id as location id
262
+ 'type': 'delivery',
263
+ 'latitude': row[1],
264
+ 'longitude': row[2]
265
+ })
266
+
267
+ print(f"Calculating distances between {len(all_locations)} locations...")
268
+
269
+ # Save the locations file early so we have this data even if the process is interrupted
270
+ location_df = pd.DataFrame(all_locations)
271
+ location_df.to_csv(os.path.join(time_matrix_dir, 'locations.csv'), index=False)
272
+
273
+ # Calculate distances and times using OSRM with improved error handling
274
+ if use_osrm:
275
+ print("Using OSRM API for road distances...")
276
+ distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=True)
277
+ else:
278
+ print("Using haversine distance with road factor adjustment...")
279
+ distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=False)
280
+
281
+ # Create DataFrames for the matrices
282
+ location_ids = [loc['id'] for loc in all_locations]
283
+
284
+ distance_df = pd.DataFrame(distance_matrix, index=location_ids, columns=location_ids)
285
+ time_df = pd.DataFrame(base_time_matrix, index=location_ids, columns=location_ids)
286
+
287
+ # Save distance and base time matrices early in case later steps fail
288
+ distance_df.to_csv(os.path.join(time_matrix_dir, 'distance_matrix.csv'))
289
+ time_df.to_csv(os.path.join(time_matrix_dir, 'base_time_matrix.csv'))
290
+ print("Basic distance and time matrices saved successfully.")
291
+
292
+ # Create time matrices for different hours of the day
293
+ hourly_time_matrices = {}
294
+ for hour in range(24):
295
+ traffic_factor = traffic_factors[hour][0]
296
+ hourly_time = base_time_matrix * traffic_factor
297
+ hourly_time_matrices[f"{hour:02d}:00"] = pd.DataFrame(hourly_time, index=location_ids, columns=location_ids)
298
+
299
+ # Save a sample of time matrices (e.g., rush hour and normal time)
300
+ try:
301
+ hourly_time_matrices['08:00'].to_csv(os.path.join(time_matrix_dir, 'morning_rush_time_matrix.csv'))
302
+ hourly_time_matrices['18:00'].to_csv(os.path.join(time_matrix_dir, 'evening_rush_time_matrix.csv'))
303
+ hourly_time_matrices['12:00'].to_csv(os.path.join(time_matrix_dir, 'midday_time_matrix.csv'))
304
+ hourly_time_matrices['00:00'].to_csv(os.path.join(time_matrix_dir, 'night_time_matrix.csv'))
305
+ print("Time matrices for different hours saved successfully.")
306
+ except Exception as e:
307
+ print(f"Error saving hourly time matrices: {e}")
308
+ print("Continuing with basic matrices only.")
309
+
310
+ print("Travel matrices generation complete.")
311
+ return distance_df, time_df, hourly_time_matrices
312
+
313
+ if __name__ == "__main__":
314
+ # For development, allow falling back to haversine if needed
315
+ import argparse
316
+
317
+ parser = argparse.ArgumentParser(description="Generate travel matrices for delivery route optimization")
318
+ parser.add_argument("--use-osrm", action="store_true", help="Use OSRM API for real road distances")
319
+ parser.add_argument("--use-haversine", action="store_true", help="Use haversine distance only (faster)")
320
+
321
+ args = parser.parse_args()
322
+
323
+ if args.use_haversine:
324
+ generate_travel_matrix(use_osrm=False)
325
+ else:
326
+ # Default to OSRM unless explicitly disabled
327
+ generate_travel_matrix(use_osrm=True)
src/utils/generate_vehicle_data.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import random
4
+ from datetime import datetime, timedelta
5
+ import os
6
+
7
+ # Set random seed for reproducibility
8
+ np.random.seed(43)
9
+
10
+ def generate_vehicle_data(n_vehicles=10):
11
+ """
12
+ Generate synthetic vehicle data for a delivery fleet optimization problem.
13
+
14
+ This function creates a realistic delivery fleet with various vehicle types,
15
+ capacities, and operational parameters to be used in route optimization.
16
+
17
+ Parameters:
18
+ -----------
19
+ n_vehicles : int, default=10
20
+ Number of vehicles to generate in the fleet
21
+
22
+ Returns:
23
+ --------
24
+ pd.DataFrame
25
+ DataFrame containing the generated vehicle data
26
+ """
27
+
28
+ # Vehicle IDs
29
+ vehicle_ids = [f'VEH{str(i).zfill(3)}' for i in range(1, n_vehicles + 1)]
30
+
31
+ # Vehicle types
32
+ vehicle_types = []
33
+ for _ in range(n_vehicles):
34
+ vehicle_type = random.choices(['Standard', 'Large', 'Refrigerated'],
35
+ weights=[0.7, 0.2, 0.1])[0]
36
+ vehicle_types.append(vehicle_type)
37
+
38
+ # Vehicle capacities based on type
39
+ max_weights = []
40
+ max_volumes = []
41
+ for v_type in vehicle_types:
42
+ if v_type == 'Standard':
43
+ max_weights.append(random.uniform(800, 1200))
44
+ max_volumes.append(random.uniform(8, 12))
45
+ elif v_type == 'Large':
46
+ max_weights.append(random.uniform(1500, 2500))
47
+ max_volumes.append(random.uniform(15, 25))
48
+ else: # Refrigerated
49
+ max_weights.append(random.uniform(600, 1000))
50
+ max_volumes.append(random.uniform(6, 10))
51
+
52
+ # Realistic depot/warehouse locations in Singapore industrial areas
53
+ # [name, latitude, longitude]
54
+ warehouse_locations = [
55
+ ["Tuas Logistics Hub", 1.3187, 103.6390],
56
+ ["Jurong Industrial Estate", 1.3233, 103.6994],
57
+ ["Loyang Industrial Park", 1.3602, 103.9761],
58
+ ["Changi Logistics Centre", 1.3497, 103.9742],
59
+ ["Keppel Distripark", 1.2706, 103.8219],
60
+ ["Pandan Logistics Hub", 1.3187, 103.7509],
61
+ ["Alexandra Distripark", 1.2744, 103.8012],
62
+ ["Kallang Way Industrial", 1.3315, 103.8731],
63
+ ["Defu Industrial Park", 1.3610, 103.8891],
64
+ ["Woodlands Industrial", 1.4428, 103.7875]
65
+ ]
66
+
67
+ # Assign warehouses to vehicles (multiple vehicles can be from same warehouse)
68
+ # Either assign sequentially to ensure all warehouses are used at least once (if n_vehicles >= len(warehouse_locations)),
69
+ # or randomly select from the list
70
+ depot_names = []
71
+ depot_lats = []
72
+ depot_lons = []
73
+
74
+ if n_vehicles <= len(warehouse_locations):
75
+ # Use first n_vehicles warehouses (one vehicle per warehouse)
76
+ selected_warehouses = warehouse_locations[:n_vehicles]
77
+ else:
78
+ # Ensure every warehouse is used at least once
79
+ selected_warehouses = warehouse_locations.copy()
80
+ # Then add random ones for remaining vehicles
81
+ remaining = n_vehicles - len(warehouse_locations)
82
+ selected_warehouses.extend([random.choice(warehouse_locations) for _ in range(remaining)])
83
+
84
+ # Shuffle to avoid sequential assignment
85
+ random.shuffle(selected_warehouses)
86
+
87
+ # Extract depot information
88
+ for warehouse in selected_warehouses:
89
+ depot_names.append(warehouse[0])
90
+ depot_lats.append(warehouse[1])
91
+ depot_lons.append(warehouse[2])
92
+
93
+ # Add small variation for vehicles from the same warehouse (within warehouse compound)
94
+ # This makes each vehicle's position slightly different, simulating different loading bays
95
+ for i in range(len(depot_lats)):
96
+ # Much smaller variation - within warehouse compound (approximately 50-100m variation)
97
+ depot_lats[i] += random.uniform(-0.0005, 0.0005)
98
+ depot_lons[i] += random.uniform(-0.0005, 0.0005)
99
+
100
+ # Driver names
101
+ first_names = ['Ahmad', 'Raj', 'Michael', 'Wei', 'Siti', 'Kumar', 'Chong', 'David', 'Suresh', 'Ali']
102
+ last_names = ['Tan', 'Singh', 'Lee', 'Wong', 'Kumar', 'Abdullah', 'Zhang', 'Lim', 'Raj', 'Teo']
103
+ driver_names = []
104
+
105
+ for i in range(n_vehicles):
106
+ if i < len(first_names):
107
+ driver_names.append(f"{first_names[i]} {random.choice(last_names)}")
108
+ else:
109
+ driver_names.append(f"{random.choice(first_names)} {random.choice(last_names)}")
110
+
111
+ # Vehicle availability
112
+ start_times = [f"{random.randint(7, 10):02d}:00" for _ in range(n_vehicles)]
113
+ end_times = [f"{random.randint(17, 21):02d}:00" for _ in range(n_vehicles)]
114
+
115
+ # Max working hours
116
+ max_working_hours = [random.randint(8, 10) for _ in range(n_vehicles)]
117
+
118
+ # Average speed (km/h)
119
+ avg_speeds = [random.uniform(30, 50) for _ in range(n_vehicles)]
120
+
121
+ # Cost per km
122
+ cost_per_km = [random.uniform(0.5, 1.5) for _ in range(n_vehicles)]
123
+
124
+ # Vehicle status
125
+ statuses = np.random.choice(['Available', 'In Service', 'Maintenance'], n_vehicles, p=[0.7, 0.2, 0.1])
126
+
127
+ # License plates (Singapore format)
128
+ letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
129
+ license_plates = []
130
+ for _ in range(n_vehicles):
131
+ letter_part = ''.join(random.choices(letters, k=3))
132
+ number_part = random.randint(1000, 9999)
133
+ license_plates.append(f"S{letter_part}{number_part}")
134
+
135
+ # Create DataFrame
136
+ df = pd.DataFrame({
137
+ 'vehicle_id': vehicle_ids,
138
+ 'vehicle_type': vehicle_types,
139
+ 'license_plate': license_plates,
140
+ 'driver_name': driver_names,
141
+ 'max_weight_kg': np.array(max_weights).round(2),
142
+ 'max_volume_m3': np.array(max_volumes).round(2),
143
+ 'depot_name': depot_names,
144
+ 'depot_latitude': np.array(depot_lats).round(6),
145
+ 'depot_longitude': np.array(depot_lons).round(6),
146
+ 'start_time': start_times,
147
+ 'end_time': end_times,
148
+ 'max_working_hours': max_working_hours,
149
+ 'avg_speed_kmh': np.array(avg_speeds).round(2),
150
+ 'cost_per_km': np.array(cost_per_km).round(2),
151
+ 'status': statuses
152
+ })
153
+
154
+ # Ensure the directory exists
155
+ data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'vehicle-data')
156
+ os.makedirs(data_dir, exist_ok=True)
157
+
158
+ # Save to CSV
159
+ output_path = os.path.join(data_dir, 'vehicle_data.csv')
160
+ df.to_csv(output_path, index=False)
161
+ print(f"Vehicle data generated and saved to {output_path}")
162
+ return df
163
+
164
+ if __name__ == "__main__":
165
+ # Generate vehicle data
166
+ vehicle_data = generate_vehicle_data(10)
167
+ print("Sample of vehicle data:")
168
+ print(vehicle_data.head())