Spaces:
Sleeping
Sleeping
add utils src
Browse files- src/utils/__init__.py +1 -0
- src/utils/generate_all_data.py +89 -0
- src/utils/generate_delivery_data.py +241 -0
- src/utils/generate_travel_matrix.py +327 -0
- src/utils/generate_vehicle_data.py +168 -0
src/utils/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# This file is intentionally left blank.
|
src/utils/generate_all_data.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
# Add the project root directory to the Python path
|
6 |
+
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
|
7 |
+
|
8 |
+
def create_data_directory():
|
9 |
+
"""
|
10 |
+
Ensure data directories exist for all generated files.
|
11 |
+
|
12 |
+
This function creates the necessary directory structure to store
|
13 |
+
delivery data, vehicle data, and travel time matrices.
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
--------
|
17 |
+
tuple of (str, str, str)
|
18 |
+
Paths to time matrix directory, vehicle data directory, and delivery data directory
|
19 |
+
"""
|
20 |
+
vehicle_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'vehicle-data')
|
21 |
+
os.makedirs(vehicle_data_dir, exist_ok=True)
|
22 |
+
|
23 |
+
delivery_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'delivery-data')
|
24 |
+
os.makedirs(delivery_data_dir, exist_ok=True)
|
25 |
+
|
26 |
+
time_matrix_data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'time-matrix')
|
27 |
+
os.makedirs(time_matrix_data_dir, exist_ok=True)
|
28 |
+
return time_matrix_data_dir, vehicle_data_dir, delivery_data_dir
|
29 |
+
|
30 |
+
def main():
|
31 |
+
"""
|
32 |
+
Run all data generation scripts for the delivery route optimization project.
|
33 |
+
|
34 |
+
This function orchestrates the creation of all synthetic datasets needed for
|
35 |
+
the route optimization problem, including delivery data, vehicle data, and
|
36 |
+
travel time/distance matrices.
|
37 |
+
|
38 |
+
Generated Files:
|
39 |
+
--------------
|
40 |
+
1. Delivery Data:
|
41 |
+
- Contains information about delivery locations, time windows, packages, etc.
|
42 |
+
- Used to define the delivery stops in the routing problem.
|
43 |
+
|
44 |
+
2. Vehicle Data:
|
45 |
+
- Contains information about the delivery fleet, capacity, depots, etc.
|
46 |
+
- Used to define the available resources for delivery routes.
|
47 |
+
|
48 |
+
3. Travel Matrices:
|
49 |
+
- Contains distance and time information between all locations.
|
50 |
+
- Used by the optimization algorithm to calculate route costs.
|
51 |
+
|
52 |
+
Usage:
|
53 |
+
------
|
54 |
+
These generated datasets form the foundation of the delivery route optimization
|
55 |
+
application. Together they define:
|
56 |
+
- Where deliveries need to be made (delivery data)
|
57 |
+
- What resources are available for deliveries (vehicle data)
|
58 |
+
- How long it takes to travel between locations (travel matrices)
|
59 |
+
|
60 |
+
The route optimization algorithm uses these inputs to determine the most
|
61 |
+
efficient assignment of deliveries to vehicles and the optimal sequence of
|
62 |
+
stops for each vehicle.
|
63 |
+
"""
|
64 |
+
print("Starting data generation process...")
|
65 |
+
|
66 |
+
time_matrix_data_dir, vehicle_data_dir, delivery_data_dir = create_data_directory()
|
67 |
+
print(f"Time Matrix Data will be saved to: {time_matrix_data_dir}")
|
68 |
+
print(f"Delivery Data will be saved to: {delivery_data_dir}")
|
69 |
+
print(f"Vehicle Data will be saved to: {vehicle_data_dir}")
|
70 |
+
|
71 |
+
# Import and run delivery data generation
|
72 |
+
print("\n1. Generating delivery data...")
|
73 |
+
from src.utils.generate_delivery_data import generate_delivery_data
|
74 |
+
delivery_data = generate_delivery_data(50, use_geocoding=True)
|
75 |
+
|
76 |
+
# Import and run vehicle data generation
|
77 |
+
print("\n2. Generating vehicle data...")
|
78 |
+
from src.utils.generate_vehicle_data import generate_vehicle_data
|
79 |
+
vehicle_data = generate_vehicle_data(10)
|
80 |
+
|
81 |
+
# Import and run travel matrix generation
|
82 |
+
print("\n3. Generating travel matrices...")
|
83 |
+
from src.utils.generate_travel_matrix import generate_travel_matrix
|
84 |
+
generate_travel_matrix()
|
85 |
+
|
86 |
+
print("\nAll data generation complete! Files saved to data directory.")
|
87 |
+
|
88 |
+
if __name__ == "__main__":
|
89 |
+
main()
|
src/utils/generate_delivery_data.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import random
|
4 |
+
from datetime import datetime, timedelta
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
import requests
|
8 |
+
from geopy.geocoders import Nominatim
|
9 |
+
|
10 |
+
# Set random seed for reproducibility
|
11 |
+
np.random.seed(42)
|
12 |
+
|
13 |
+
def generate_delivery_data(n_deliveries=50, use_geocoding=False):
|
14 |
+
"""
|
15 |
+
Generate synthetic delivery data with realistic Singapore addresses
|
16 |
+
"""
|
17 |
+
# Define real Singapore neighborhoods and their actual streets
|
18 |
+
# Format: [neighborhood_name, [list_of_real_streets], postal_code_prefix]
|
19 |
+
sg_neighborhoods = [
|
20 |
+
['Ang Mo Kio', ['Ang Mo Kio Avenue 1', 'Ang Mo Kio Avenue 3', 'Ang Mo Kio Avenue 4', 'Ang Mo Kio Avenue 10'], '56'],
|
21 |
+
['Bedok', ['Bedok North Avenue 1', 'Bedok North Road', 'Bedok Reservoir Road', 'New Upper Changi Road'], '46'],
|
22 |
+
['Bishan', ['Bishan Street 11', 'Bishan Street 12', 'Bishan Street 13', 'Bishan Street 22'], '57'],
|
23 |
+
['Bukit Merah', ['Jalan Bukit Merah', 'Henderson Road', 'Tiong Bahru Road', 'Redhill Close'], '15'],
|
24 |
+
['Bukit Batok', ['Bukit Batok East Avenue 6', 'Bukit Batok West Avenue 8', 'Bukit Batok Street 21'], '65'],
|
25 |
+
['Clementi', ['Clementi Avenue 1', 'Clementi Avenue 4', 'Clementi Road', 'Commonwealth Avenue West'], '12'],
|
26 |
+
['Geylang', ['Geylang East Avenue 1', 'Geylang Road', 'Guillemard Road', 'Sims Avenue'], '38'],
|
27 |
+
['Hougang', ['Hougang Avenue 1', 'Hougang Avenue 7', 'Hougang Street 91', 'Upper Serangoon Road'], '53'],
|
28 |
+
['Jurong East', ['Jurong East Street 13', 'Jurong East Avenue 1', 'Jurong Gateway Road'], '60'],
|
29 |
+
['Jurong West', ['Jurong West Street 41', 'Jurong West Street 52', 'Jurong West Street 93'], '64'],
|
30 |
+
['Kallang', ['Kallang Avenue', 'Geylang Bahru', 'Boon Keng Road', 'Upper Boon Keng Road'], '33'],
|
31 |
+
['Punggol', ['Punggol Central', 'Punggol Field', 'Punggol Road', 'Punggol Way'], '82'],
|
32 |
+
['Queenstown', ['Commonwealth Avenue', 'Commonwealth Drive', 'Mei Chin Road', 'Stirling Road'], '14'],
|
33 |
+
['Sengkang', ['Sengkang East Way', 'Sengkang West Way', 'Compassvale Road', 'Fernvale Road'], '54'],
|
34 |
+
['Serangoon', ['Serangoon Avenue 2', 'Serangoon Avenue 3', 'Serangoon North Avenue 1'], '55'],
|
35 |
+
['Tampines', ['Tampines Street 11', 'Tampines Street 21', 'Tampines Avenue 1', 'Tampines Avenue 4'], '52'],
|
36 |
+
['Toa Payoh', ['Toa Payoh Lorong 1', 'Toa Payoh Lorong 2', 'Toa Payoh Lorong 4', 'Toa Payoh Central'], '31'],
|
37 |
+
['Woodlands', ['Woodlands Avenue 1', 'Woodlands Drive 16', 'Woodlands Drive 72', 'Woodlands Circle'], '73'],
|
38 |
+
['Yishun', ['Yishun Avenue 1', 'Yishun Avenue 4', 'Yishun Ring Road', 'Yishun Street 22'], '76']
|
39 |
+
]
|
40 |
+
|
41 |
+
# Bounding boxes for neighborhoods (for fallback coordinates)
|
42 |
+
# Format: [name, min_lat, max_lat, min_lon, max_lon]
|
43 |
+
neighborhood_bounds = {
|
44 |
+
'Ang Mo Kio': [1.360000, 1.380000, 103.830000, 103.860000],
|
45 |
+
'Bedok': [1.320000, 1.335000, 103.920000, 103.950000],
|
46 |
+
'Bishan': [1.345000, 1.360000, 103.830000, 103.855000],
|
47 |
+
'Bukit Merah': [1.270000, 1.290000, 103.800000, 103.830000],
|
48 |
+
'Bukit Batok': [1.340000, 1.360000, 103.740000, 103.770000],
|
49 |
+
'Clementi': [1.310000, 1.325000, 103.750000, 103.780000],
|
50 |
+
'Geylang': [1.310000, 1.325000, 103.880000, 103.900000],
|
51 |
+
'Hougang': [1.370000, 1.385000, 103.880000, 103.900000],
|
52 |
+
'Jurong East': [1.330000, 1.345000, 103.730000, 103.750000],
|
53 |
+
'Jurong West': [1.340000, 1.360000, 103.690000, 103.720000],
|
54 |
+
'Kallang': [1.300000, 1.320000, 103.850000, 103.880000],
|
55 |
+
'Punggol': [1.390000, 1.410000, 103.900000, 103.920000],
|
56 |
+
'Queenstown': [1.290000, 1.310000, 103.780000, 103.805000],
|
57 |
+
'Sengkang': [1.380000, 1.395000, 103.870000, 103.900000],
|
58 |
+
'Serangoon': [1.345000, 1.360000, 103.865000, 103.885000],
|
59 |
+
'Tampines': [1.345000, 1.365000, 103.930000, 103.960000],
|
60 |
+
'Toa Payoh': [1.326000, 1.341000, 103.840000, 103.865000],
|
61 |
+
'Woodlands': [1.430000, 1.450000, 103.770000, 103.800000],
|
62 |
+
'Yishun': [1.410000, 1.430000, 103.820000, 103.850000]
|
63 |
+
}
|
64 |
+
|
65 |
+
# Generate delivery IDs
|
66 |
+
delivery_ids = [f'DEL{str(i).zfill(4)}' for i in range(1, n_deliveries + 1)]
|
67 |
+
|
68 |
+
# Generate customer names (fictional)
|
69 |
+
first_names = ['Tan', 'Lim', 'Lee', 'Ng', 'Wong', 'Chan', 'Goh', 'Ong', 'Teo', 'Koh',
|
70 |
+
'Chua', 'Loh', 'Yeo', 'Sim', 'Ho', 'Ang', 'Tay', 'Yap', 'Leong', 'Foo']
|
71 |
+
last_names = ['Wei', 'Ming', 'Hui', 'Ling', 'Yong', 'Jun', 'Hong', 'Xin', 'Yi', 'Jie',
|
72 |
+
'Cheng', 'Kai', 'Zhi', 'Tian', 'Yu', 'En', 'Yang', 'Hao', 'Chong', 'Zheng']
|
73 |
+
customer_names = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(n_deliveries)]
|
74 |
+
|
75 |
+
addresses = []
|
76 |
+
postal_codes = []
|
77 |
+
latitudes = []
|
78 |
+
longitudes = []
|
79 |
+
neighborhood_names = []
|
80 |
+
|
81 |
+
# Initialize geocoder if using geocoding
|
82 |
+
if use_geocoding:
|
83 |
+
geolocator = Nominatim(user_agent="delivery_app")
|
84 |
+
|
85 |
+
# Generate realistic addresses
|
86 |
+
for i in range(n_deliveries):
|
87 |
+
# Randomly select a neighborhood
|
88 |
+
neighborhood_data = random.choice(sg_neighborhoods)
|
89 |
+
neighborhood = neighborhood_data[0]
|
90 |
+
streets = neighborhood_data[1]
|
91 |
+
postal_prefix = neighborhood_data[2]
|
92 |
+
|
93 |
+
# Randomly select a street in that neighborhood
|
94 |
+
street = random.choice(streets)
|
95 |
+
|
96 |
+
# Generate block number (realistic for HDB)
|
97 |
+
block = random.randint(100, 600)
|
98 |
+
|
99 |
+
# Generate unit number
|
100 |
+
unit_floor = random.randint(2, 20)
|
101 |
+
unit_number = random.randint(1, 150)
|
102 |
+
|
103 |
+
# Generate postal code (with realistic prefix)
|
104 |
+
postal_suffix = str(random.randint(0, 999)).zfill(3)
|
105 |
+
postal_code = postal_prefix + postal_suffix
|
106 |
+
|
107 |
+
# Create two formats of address - one for display, one for geocoding
|
108 |
+
display_address = f"Block {block}, #{unit_floor:02d}-{unit_number:03d}, {street}, Singapore {postal_code}"
|
109 |
+
geocode_address = f"{block} {street}, Singapore {postal_code}" # Simpler format for geocoding
|
110 |
+
|
111 |
+
# Default coordinates from neighborhood bounding box (fallback)
|
112 |
+
bounds = neighborhood_bounds[neighborhood]
|
113 |
+
default_lat = round(random.uniform(bounds[0], bounds[1]), 6)
|
114 |
+
default_lon = round(random.uniform(bounds[2], bounds[3]), 6)
|
115 |
+
|
116 |
+
# Use geocoding API if requested
|
117 |
+
if use_geocoding:
|
118 |
+
try:
|
119 |
+
location = geolocator.geocode(geocode_address)
|
120 |
+
|
121 |
+
if location:
|
122 |
+
lat = location.latitude
|
123 |
+
lon = location.longitude
|
124 |
+
print(f"✓ Successfully geocoded: {geocode_address} → ({lat}, {lon})")
|
125 |
+
else:
|
126 |
+
# First fallback: try with just street and postal code
|
127 |
+
simpler_address = f"{street}, Singapore {postal_code}"
|
128 |
+
location = geolocator.geocode(simpler_address)
|
129 |
+
|
130 |
+
if location:
|
131 |
+
lat = location.latitude
|
132 |
+
lon = location.longitude
|
133 |
+
print(f"✓ Fallback geocoded: {simpler_address} → ({lat}, {lon})")
|
134 |
+
else:
|
135 |
+
# Second fallback: just use the neighborhood center
|
136 |
+
lat = default_lat
|
137 |
+
lon = default_lon
|
138 |
+
print(f"✗ Could not geocode: {geocode_address}, using neighborhood coordinates")
|
139 |
+
|
140 |
+
# Add delay to avoid being rate limited
|
141 |
+
time.sleep(1)
|
142 |
+
|
143 |
+
except Exception as e:
|
144 |
+
print(f"✗ Geocoding error for {geocode_address}: {str(e)}")
|
145 |
+
lat = default_lat
|
146 |
+
lon = default_lon
|
147 |
+
else:
|
148 |
+
# Without geocoding, use the default coordinates
|
149 |
+
lat = default_lat
|
150 |
+
lon = default_lon
|
151 |
+
|
152 |
+
addresses.append(display_address)
|
153 |
+
postal_codes.append(postal_code)
|
154 |
+
latitudes.append(lat)
|
155 |
+
longitudes.append(lon)
|
156 |
+
neighborhood_names.append(neighborhood)
|
157 |
+
|
158 |
+
# Generate delivery dates (within the next 7 days)
|
159 |
+
base_date = datetime.now().date()
|
160 |
+
delivery_dates = [base_date + timedelta(days=random.randint(1, 7)) for _ in range(n_deliveries)]
|
161 |
+
|
162 |
+
# Generate time windows (between 9 AM and 5 PM)
|
163 |
+
time_windows = []
|
164 |
+
for _ in range(n_deliveries):
|
165 |
+
start_hour = random.randint(9, 16)
|
166 |
+
window_length = random.choice([1, 2, 3]) # 1, 2, or 3 hour windows
|
167 |
+
end_hour = min(start_hour + window_length, 18)
|
168 |
+
|
169 |
+
start_time = f"{start_hour:02d}:00"
|
170 |
+
end_time = f"{end_hour:02d}:00"
|
171 |
+
time_windows.append(f"{start_time}-{end_time}")
|
172 |
+
|
173 |
+
# Generate package details
|
174 |
+
weights = np.random.uniform(0.5, 20.0, n_deliveries) # in kg
|
175 |
+
volumes = np.random.uniform(0.01, 0.5, n_deliveries) # in m³
|
176 |
+
|
177 |
+
# Priority levels
|
178 |
+
priorities = np.random.choice(['High', 'Medium', 'Low'], n_deliveries,
|
179 |
+
p=[0.2, 0.5, 0.3]) # 20% High, 50% Medium, 30% Low
|
180 |
+
|
181 |
+
# Required vehicle type
|
182 |
+
vehicle_types = np.random.choice(['Standard', 'Large', 'Refrigerated'], n_deliveries,
|
183 |
+
p=[0.7, 0.2, 0.1])
|
184 |
+
|
185 |
+
# Status
|
186 |
+
statuses = np.random.choice(['Pending', 'Assigned', 'In Transit', 'Delivered'], n_deliveries,
|
187 |
+
p=[0.6, 0.2, 0.15, 0.05])
|
188 |
+
|
189 |
+
# Additional notes
|
190 |
+
notes = []
|
191 |
+
special_instructions = [
|
192 |
+
'Call customer before delivery',
|
193 |
+
'Fragile items',
|
194 |
+
'Leave at door',
|
195 |
+
'Signature required',
|
196 |
+
'No delivery on weekends',
|
197 |
+
None
|
198 |
+
]
|
199 |
+
|
200 |
+
for _ in range(n_deliveries):
|
201 |
+
if random.random() < 0.7: # 70% chance of having a note
|
202 |
+
notes.append(random.choice(special_instructions))
|
203 |
+
else:
|
204 |
+
notes.append(None)
|
205 |
+
|
206 |
+
# Create DataFrame
|
207 |
+
df = pd.DataFrame({
|
208 |
+
'delivery_id': delivery_ids,
|
209 |
+
'customer_name': customer_names,
|
210 |
+
'address': addresses,
|
211 |
+
'postal_code': postal_codes,
|
212 |
+
'neighborhood': neighborhood_names,
|
213 |
+
'latitude': latitudes,
|
214 |
+
'longitude': longitudes,
|
215 |
+
'delivery_date': delivery_dates,
|
216 |
+
'time_window': time_windows,
|
217 |
+
'weight_kg': weights.round(2),
|
218 |
+
'volume_m3': volumes.round(3),
|
219 |
+
'priority': priorities,
|
220 |
+
'vehicle_type': vehicle_types,
|
221 |
+
'status': statuses,
|
222 |
+
'special_instructions': notes
|
223 |
+
})
|
224 |
+
|
225 |
+
# Ensure the directory exists
|
226 |
+
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'delivery-data')
|
227 |
+
os.makedirs(data_dir, exist_ok=True)
|
228 |
+
|
229 |
+
# Save to CSV
|
230 |
+
output_path = os.path.join(data_dir, 'delivery_data.csv')
|
231 |
+
df.to_csv(output_path, index=False)
|
232 |
+
print(f"Delivery data generated and saved to {output_path}")
|
233 |
+
return df
|
234 |
+
|
235 |
+
if __name__ == "__main__":
|
236 |
+
# Set to True if you want to use real geocoding (slower but more accurate)
|
237 |
+
USE_GEOCODING = True
|
238 |
+
delivery_data = generate_delivery_data(50, use_geocoding=USE_GEOCODING)
|
239 |
+
print("Sample of delivery data:")
|
240 |
+
print(delivery_data.head())
|
241 |
+
|
src/utils/generate_travel_matrix.py
ADDED
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
import requests
|
6 |
+
from math import radians, sin, cos, sqrt, atan2
|
7 |
+
import random
|
8 |
+
|
9 |
+
def haversine_distance(lat1, lon1, lat2, lon2):
|
10 |
+
"""
|
11 |
+
Calculate the Haversine distance between two points in kilometers.
|
12 |
+
The Haversine distance is the great-circle distance between two points on a sphere.
|
13 |
+
|
14 |
+
Parameters:
|
15 |
+
-----------
|
16 |
+
lat1, lon1 : float
|
17 |
+
Coordinates of the first point in decimal degrees
|
18 |
+
lat2, lon2 : float
|
19 |
+
Coordinates of the second point in decimal degrees
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
--------
|
23 |
+
float
|
24 |
+
Distance between the two points in kilometers
|
25 |
+
"""
|
26 |
+
# Convert decimal degrees to radians
|
27 |
+
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
|
28 |
+
|
29 |
+
# Haversine formula
|
30 |
+
dlon = lon2 - lon1
|
31 |
+
dlat = lat2 - lat1
|
32 |
+
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
33 |
+
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
34 |
+
distance = 6371 * c # Radius of Earth in kilometers
|
35 |
+
|
36 |
+
return distance
|
37 |
+
|
38 |
+
def get_road_distance_with_retry(origin, destination, max_retries=3, initial_backoff=1):
|
39 |
+
"""
|
40 |
+
Get road distance between two points with retry logic
|
41 |
+
|
42 |
+
Parameters:
|
43 |
+
-----------
|
44 |
+
origin : dict
|
45 |
+
Origin location with 'latitude' and 'longitude' keys
|
46 |
+
destination : dict
|
47 |
+
Destination location with 'latitude' and 'longitude' keys
|
48 |
+
max_retries : int
|
49 |
+
Maximum number of retry attempts
|
50 |
+
initial_backoff : int
|
51 |
+
Initial backoff time in seconds
|
52 |
+
|
53 |
+
Returns:
|
54 |
+
--------
|
55 |
+
tuple of (float, float)
|
56 |
+
Distance in km and duration in minutes
|
57 |
+
"""
|
58 |
+
# URLs for different public OSRM instances to distribute load
|
59 |
+
osrm_urls = [
|
60 |
+
"http://router.project-osrm.org",
|
61 |
+
"https://routing.openstreetmap.de",
|
62 |
+
# Add more public OSRM servers if available
|
63 |
+
]
|
64 |
+
|
65 |
+
retry_count = 0
|
66 |
+
backoff = initial_backoff
|
67 |
+
|
68 |
+
while retry_count < max_retries:
|
69 |
+
try:
|
70 |
+
# Use a random OSRM server from the list to distribute load
|
71 |
+
base_url = random.choice(osrm_urls)
|
72 |
+
url = f"{base_url}/route/v1/driving/{origin['longitude']},{origin['latitude']};{destination['longitude']},{destination['latitude']}?overview=false"
|
73 |
+
|
74 |
+
# Add a timeout to prevent hanging connections
|
75 |
+
response = requests.get(url, timeout=5)
|
76 |
+
data = response.json()
|
77 |
+
|
78 |
+
if data.get('code') == 'Ok':
|
79 |
+
# Extract distance and duration
|
80 |
+
distance = data['routes'][0]['distance'] / 1000 # meters to km
|
81 |
+
duration = data['routes'][0]['duration'] / 60 # seconds to minutes
|
82 |
+
return round(distance, 2), round(duration, 2)
|
83 |
+
else:
|
84 |
+
print(f"API returned error: {data.get('message', 'Unknown error')}")
|
85 |
+
|
86 |
+
except requests.exceptions.RequestException as e:
|
87 |
+
print(f"Request failed: {e}. Retry {retry_count+1}/{max_retries}")
|
88 |
+
|
89 |
+
# Exponential backoff with jitter to prevent thundering herd
|
90 |
+
jitter = random.uniform(0, 0.5 * backoff)
|
91 |
+
sleep_time = backoff + jitter
|
92 |
+
time.sleep(sleep_time)
|
93 |
+
backoff *= 2 # Exponential backoff
|
94 |
+
retry_count += 1
|
95 |
+
|
96 |
+
# Fallback to haversine after all retries failed
|
97 |
+
print(f"All retries failed for route from ({origin['latitude']},{origin['longitude']}) to ({destination['latitude']},{destination['longitude']}). Using haversine distance.")
|
98 |
+
distance = haversine_distance(
|
99 |
+
origin['latitude'], origin['longitude'],
|
100 |
+
destination['latitude'], destination['longitude']
|
101 |
+
)
|
102 |
+
distance = distance * 1.3 # Road factor
|
103 |
+
time_mins = (distance / 40) * 60 # 40 km/h
|
104 |
+
|
105 |
+
return round(distance, 2), round(time_mins, 2)
|
106 |
+
|
107 |
+
def get_road_distance(origins, destinations, use_osrm=True):
|
108 |
+
"""
|
109 |
+
Calculate actual road distances and travel times between multiple origins and destinations
|
110 |
+
using the OSRM (Open Source Routing Machine) API.
|
111 |
+
|
112 |
+
Parameters:
|
113 |
+
-----------
|
114 |
+
origins : list of dict
|
115 |
+
List of origin locations with 'latitude' and 'longitude' keys
|
116 |
+
destinations : list of dict
|
117 |
+
List of destination locations with 'latitude' and 'longitude' keys
|
118 |
+
use_osrm : bool, default=True
|
119 |
+
Whether to use OSRM API or fall back to haversine distance
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
--------
|
123 |
+
tuple of (numpy.ndarray, numpy.ndarray)
|
124 |
+
Arrays containing distances (in km) and durations (in minutes) between each origin-destination pair
|
125 |
+
"""
|
126 |
+
n_origins = len(origins)
|
127 |
+
n_destinations = len(destinations)
|
128 |
+
distance_matrix = np.zeros((n_origins, n_destinations))
|
129 |
+
duration_matrix = np.zeros((n_origins, n_destinations))
|
130 |
+
|
131 |
+
# If OSRM is not requested, fall back to haversine distance
|
132 |
+
if not use_osrm:
|
133 |
+
print("Using haversine distance as fallback.")
|
134 |
+
for i, origin in enumerate(origins):
|
135 |
+
for j, dest in enumerate(destinations):
|
136 |
+
distance = haversine_distance(
|
137 |
+
origin['latitude'], origin['longitude'],
|
138 |
+
dest['latitude'], dest['longitude']
|
139 |
+
)
|
140 |
+
# Adjust for road networks (roads are typically not straight lines)
|
141 |
+
distance = distance * 1.3 # Apply a factor to approximate road distance
|
142 |
+
time_mins = (distance / 40) * 60 # Assuming average speed of 40 km/h
|
143 |
+
|
144 |
+
distance_matrix[i, j] = round(distance, 2)
|
145 |
+
duration_matrix[i, j] = round(time_mins, 2)
|
146 |
+
return distance_matrix, duration_matrix
|
147 |
+
|
148 |
+
# Process in batches to prevent overwhelming the API
|
149 |
+
print(f"Processing {n_origins} origins and {n_destinations} destinations in batches...")
|
150 |
+
total_requests = n_origins * n_destinations
|
151 |
+
completed = 0
|
152 |
+
|
153 |
+
try:
|
154 |
+
# Try OSRM's table service for small datasets first (more efficient)
|
155 |
+
if n_origins + n_destinations <= 50:
|
156 |
+
print("Trying OSRM table API for efficient matrix calculation...")
|
157 |
+
try:
|
158 |
+
# Code for table API would go here, but we'll skip for now as it's more complex
|
159 |
+
# and the batch approach is more reliable for handling errors
|
160 |
+
raise NotImplementedError("Table API not implemented, falling back to individual routes")
|
161 |
+
except Exception as e:
|
162 |
+
print(f"Table API failed: {e}. Using individual routes instead.")
|
163 |
+
# Continue with individual route requests below
|
164 |
+
|
165 |
+
# Process with individual route requests
|
166 |
+
for i, origin in enumerate(origins):
|
167 |
+
for j, dest in enumerate(destinations):
|
168 |
+
# Skip if origin and destination are the same point
|
169 |
+
if i == j:
|
170 |
+
distance_matrix[i, j] = 0
|
171 |
+
duration_matrix[i, j] = 0
|
172 |
+
completed += 1
|
173 |
+
continue
|
174 |
+
|
175 |
+
# Get distance with retry logic
|
176 |
+
distance, duration = get_road_distance_with_retry(origin, dest)
|
177 |
+
distance_matrix[i, j] = distance
|
178 |
+
duration_matrix[i, j] = duration
|
179 |
+
|
180 |
+
# Show progress
|
181 |
+
completed += 1
|
182 |
+
if completed % 10 == 0:
|
183 |
+
print(f"Progress: {completed}/{total_requests} routes calculated ({(completed/total_requests)*100:.1f}%)")
|
184 |
+
|
185 |
+
# Add randomized delay to prevent overwhelming the API
|
186 |
+
time.sleep(random.uniform(0.1, 0.5))
|
187 |
+
|
188 |
+
except KeyboardInterrupt:
|
189 |
+
print("\nOperation interrupted by user. Saving partial results...")
|
190 |
+
|
191 |
+
return distance_matrix, duration_matrix
|
192 |
+
|
193 |
+
def generate_travel_matrix(use_osrm=True):
|
194 |
+
"""
|
195 |
+
Generate travel time and distance matrices between all locations in the delivery problem.
|
196 |
+
|
197 |
+
Parameters:
|
198 |
+
-----------
|
199 |
+
use_osrm : bool, default=True
|
200 |
+
Whether to use OSRM API for real road distances instead of haversine
|
201 |
+
|
202 |
+
Returns:
|
203 |
+
--------
|
204 |
+
tuple of (pd.DataFrame, pd.DataFrame, dict)
|
205 |
+
Distance matrix, base time matrix, and hourly time matrices
|
206 |
+
"""
|
207 |
+
# Create data directories if they don't exist
|
208 |
+
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data')
|
209 |
+
time_matrix_dir = os.path.join(data_dir, 'time-matrix')
|
210 |
+
delivery_data_dir = os.path.join(data_dir, 'delivery-data')
|
211 |
+
vehicle_data_dir = os.path.join(data_dir, 'vehicle-data')
|
212 |
+
|
213 |
+
# Ensure all directories exist
|
214 |
+
for directory in [time_matrix_dir, delivery_data_dir, vehicle_data_dir]:
|
215 |
+
os.makedirs(directory, exist_ok=True)
|
216 |
+
|
217 |
+
# Read delivery and vehicle data
|
218 |
+
try:
|
219 |
+
delivery_data = pd.read_csv(os.path.join(delivery_data_dir, 'delivery_data.csv'))
|
220 |
+
vehicle_data = pd.read_csv(os.path.join(vehicle_data_dir, 'vehicle_data.csv'))
|
221 |
+
except FileNotFoundError:
|
222 |
+
print("Error: Please generate delivery and vehicle data first!")
|
223 |
+
return
|
224 |
+
|
225 |
+
# Extract locations
|
226 |
+
delivery_locations = delivery_data[['delivery_id', 'latitude', 'longitude']].values
|
227 |
+
depot_locations = vehicle_data[['vehicle_id', 'depot_latitude', 'depot_longitude']].values
|
228 |
+
|
229 |
+
# Average speed for time calculation (km/h)
|
230 |
+
avg_speed = vehicle_data['avg_speed_kmh'].mean()
|
231 |
+
|
232 |
+
# Traffic factor matrix (to simulate traffic conditions at different times)
|
233 |
+
hours_in_day = 24
|
234 |
+
traffic_factors = np.ones((hours_in_day, 1))
|
235 |
+
|
236 |
+
# Simulate morning rush hour (8-10 AM)
|
237 |
+
traffic_factors[8:10] = 1.5
|
238 |
+
|
239 |
+
# Simulate evening rush hour (5-7 PM)
|
240 |
+
traffic_factors[17:19] = 1.8
|
241 |
+
|
242 |
+
# Late night (less traffic)
|
243 |
+
traffic_factors[22:] = 0.8
|
244 |
+
traffic_factors[:5] = 0.7
|
245 |
+
|
246 |
+
# Create a combined list of all locations (depots + delivery points)
|
247 |
+
all_locations = []
|
248 |
+
|
249 |
+
# Add depot locations
|
250 |
+
for row in depot_locations:
|
251 |
+
all_locations.append({
|
252 |
+
'id': row[0], # vehicle_id as location id
|
253 |
+
'type': 'depot',
|
254 |
+
'latitude': row[1],
|
255 |
+
'longitude': row[2]
|
256 |
+
})
|
257 |
+
|
258 |
+
# Add delivery locations
|
259 |
+
for row in delivery_locations:
|
260 |
+
all_locations.append({
|
261 |
+
'id': row[0], # delivery_id as location id
|
262 |
+
'type': 'delivery',
|
263 |
+
'latitude': row[1],
|
264 |
+
'longitude': row[2]
|
265 |
+
})
|
266 |
+
|
267 |
+
print(f"Calculating distances between {len(all_locations)} locations...")
|
268 |
+
|
269 |
+
# Save the locations file early so we have this data even if the process is interrupted
|
270 |
+
location_df = pd.DataFrame(all_locations)
|
271 |
+
location_df.to_csv(os.path.join(time_matrix_dir, 'locations.csv'), index=False)
|
272 |
+
|
273 |
+
# Calculate distances and times using OSRM with improved error handling
|
274 |
+
if use_osrm:
|
275 |
+
print("Using OSRM API for road distances...")
|
276 |
+
distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=True)
|
277 |
+
else:
|
278 |
+
print("Using haversine distance with road factor adjustment...")
|
279 |
+
distance_matrix, base_time_matrix = get_road_distance(all_locations, all_locations, use_osrm=False)
|
280 |
+
|
281 |
+
# Create DataFrames for the matrices
|
282 |
+
location_ids = [loc['id'] for loc in all_locations]
|
283 |
+
|
284 |
+
distance_df = pd.DataFrame(distance_matrix, index=location_ids, columns=location_ids)
|
285 |
+
time_df = pd.DataFrame(base_time_matrix, index=location_ids, columns=location_ids)
|
286 |
+
|
287 |
+
# Save distance and base time matrices early in case later steps fail
|
288 |
+
distance_df.to_csv(os.path.join(time_matrix_dir, 'distance_matrix.csv'))
|
289 |
+
time_df.to_csv(os.path.join(time_matrix_dir, 'base_time_matrix.csv'))
|
290 |
+
print("Basic distance and time matrices saved successfully.")
|
291 |
+
|
292 |
+
# Create time matrices for different hours of the day
|
293 |
+
hourly_time_matrices = {}
|
294 |
+
for hour in range(24):
|
295 |
+
traffic_factor = traffic_factors[hour][0]
|
296 |
+
hourly_time = base_time_matrix * traffic_factor
|
297 |
+
hourly_time_matrices[f"{hour:02d}:00"] = pd.DataFrame(hourly_time, index=location_ids, columns=location_ids)
|
298 |
+
|
299 |
+
# Save a sample of time matrices (e.g., rush hour and normal time)
|
300 |
+
try:
|
301 |
+
hourly_time_matrices['08:00'].to_csv(os.path.join(time_matrix_dir, 'morning_rush_time_matrix.csv'))
|
302 |
+
hourly_time_matrices['18:00'].to_csv(os.path.join(time_matrix_dir, 'evening_rush_time_matrix.csv'))
|
303 |
+
hourly_time_matrices['12:00'].to_csv(os.path.join(time_matrix_dir, 'midday_time_matrix.csv'))
|
304 |
+
hourly_time_matrices['00:00'].to_csv(os.path.join(time_matrix_dir, 'night_time_matrix.csv'))
|
305 |
+
print("Time matrices for different hours saved successfully.")
|
306 |
+
except Exception as e:
|
307 |
+
print(f"Error saving hourly time matrices: {e}")
|
308 |
+
print("Continuing with basic matrices only.")
|
309 |
+
|
310 |
+
print("Travel matrices generation complete.")
|
311 |
+
return distance_df, time_df, hourly_time_matrices
|
312 |
+
|
313 |
+
if __name__ == "__main__":
|
314 |
+
# For development, allow falling back to haversine if needed
|
315 |
+
import argparse
|
316 |
+
|
317 |
+
parser = argparse.ArgumentParser(description="Generate travel matrices for delivery route optimization")
|
318 |
+
parser.add_argument("--use-osrm", action="store_true", help="Use OSRM API for real road distances")
|
319 |
+
parser.add_argument("--use-haversine", action="store_true", help="Use haversine distance only (faster)")
|
320 |
+
|
321 |
+
args = parser.parse_args()
|
322 |
+
|
323 |
+
if args.use_haversine:
|
324 |
+
generate_travel_matrix(use_osrm=False)
|
325 |
+
else:
|
326 |
+
# Default to OSRM unless explicitly disabled
|
327 |
+
generate_travel_matrix(use_osrm=True)
|
src/utils/generate_vehicle_data.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import random
|
4 |
+
from datetime import datetime, timedelta
|
5 |
+
import os
|
6 |
+
|
7 |
+
# Set random seed for reproducibility
|
8 |
+
np.random.seed(43)
|
9 |
+
|
10 |
+
def generate_vehicle_data(n_vehicles=10):
|
11 |
+
"""
|
12 |
+
Generate synthetic vehicle data for a delivery fleet optimization problem.
|
13 |
+
|
14 |
+
This function creates a realistic delivery fleet with various vehicle types,
|
15 |
+
capacities, and operational parameters to be used in route optimization.
|
16 |
+
|
17 |
+
Parameters:
|
18 |
+
-----------
|
19 |
+
n_vehicles : int, default=10
|
20 |
+
Number of vehicles to generate in the fleet
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
--------
|
24 |
+
pd.DataFrame
|
25 |
+
DataFrame containing the generated vehicle data
|
26 |
+
"""
|
27 |
+
|
28 |
+
# Vehicle IDs
|
29 |
+
vehicle_ids = [f'VEH{str(i).zfill(3)}' for i in range(1, n_vehicles + 1)]
|
30 |
+
|
31 |
+
# Vehicle types
|
32 |
+
vehicle_types = []
|
33 |
+
for _ in range(n_vehicles):
|
34 |
+
vehicle_type = random.choices(['Standard', 'Large', 'Refrigerated'],
|
35 |
+
weights=[0.7, 0.2, 0.1])[0]
|
36 |
+
vehicle_types.append(vehicle_type)
|
37 |
+
|
38 |
+
# Vehicle capacities based on type
|
39 |
+
max_weights = []
|
40 |
+
max_volumes = []
|
41 |
+
for v_type in vehicle_types:
|
42 |
+
if v_type == 'Standard':
|
43 |
+
max_weights.append(random.uniform(800, 1200))
|
44 |
+
max_volumes.append(random.uniform(8, 12))
|
45 |
+
elif v_type == 'Large':
|
46 |
+
max_weights.append(random.uniform(1500, 2500))
|
47 |
+
max_volumes.append(random.uniform(15, 25))
|
48 |
+
else: # Refrigerated
|
49 |
+
max_weights.append(random.uniform(600, 1000))
|
50 |
+
max_volumes.append(random.uniform(6, 10))
|
51 |
+
|
52 |
+
# Realistic depot/warehouse locations in Singapore industrial areas
|
53 |
+
# [name, latitude, longitude]
|
54 |
+
warehouse_locations = [
|
55 |
+
["Tuas Logistics Hub", 1.3187, 103.6390],
|
56 |
+
["Jurong Industrial Estate", 1.3233, 103.6994],
|
57 |
+
["Loyang Industrial Park", 1.3602, 103.9761],
|
58 |
+
["Changi Logistics Centre", 1.3497, 103.9742],
|
59 |
+
["Keppel Distripark", 1.2706, 103.8219],
|
60 |
+
["Pandan Logistics Hub", 1.3187, 103.7509],
|
61 |
+
["Alexandra Distripark", 1.2744, 103.8012],
|
62 |
+
["Kallang Way Industrial", 1.3315, 103.8731],
|
63 |
+
["Defu Industrial Park", 1.3610, 103.8891],
|
64 |
+
["Woodlands Industrial", 1.4428, 103.7875]
|
65 |
+
]
|
66 |
+
|
67 |
+
# Assign warehouses to vehicles (multiple vehicles can be from same warehouse)
|
68 |
+
# Either assign sequentially to ensure all warehouses are used at least once (if n_vehicles >= len(warehouse_locations)),
|
69 |
+
# or randomly select from the list
|
70 |
+
depot_names = []
|
71 |
+
depot_lats = []
|
72 |
+
depot_lons = []
|
73 |
+
|
74 |
+
if n_vehicles <= len(warehouse_locations):
|
75 |
+
# Use first n_vehicles warehouses (one vehicle per warehouse)
|
76 |
+
selected_warehouses = warehouse_locations[:n_vehicles]
|
77 |
+
else:
|
78 |
+
# Ensure every warehouse is used at least once
|
79 |
+
selected_warehouses = warehouse_locations.copy()
|
80 |
+
# Then add random ones for remaining vehicles
|
81 |
+
remaining = n_vehicles - len(warehouse_locations)
|
82 |
+
selected_warehouses.extend([random.choice(warehouse_locations) for _ in range(remaining)])
|
83 |
+
|
84 |
+
# Shuffle to avoid sequential assignment
|
85 |
+
random.shuffle(selected_warehouses)
|
86 |
+
|
87 |
+
# Extract depot information
|
88 |
+
for warehouse in selected_warehouses:
|
89 |
+
depot_names.append(warehouse[0])
|
90 |
+
depot_lats.append(warehouse[1])
|
91 |
+
depot_lons.append(warehouse[2])
|
92 |
+
|
93 |
+
# Add small variation for vehicles from the same warehouse (within warehouse compound)
|
94 |
+
# This makes each vehicle's position slightly different, simulating different loading bays
|
95 |
+
for i in range(len(depot_lats)):
|
96 |
+
# Much smaller variation - within warehouse compound (approximately 50-100m variation)
|
97 |
+
depot_lats[i] += random.uniform(-0.0005, 0.0005)
|
98 |
+
depot_lons[i] += random.uniform(-0.0005, 0.0005)
|
99 |
+
|
100 |
+
# Driver names
|
101 |
+
first_names = ['Ahmad', 'Raj', 'Michael', 'Wei', 'Siti', 'Kumar', 'Chong', 'David', 'Suresh', 'Ali']
|
102 |
+
last_names = ['Tan', 'Singh', 'Lee', 'Wong', 'Kumar', 'Abdullah', 'Zhang', 'Lim', 'Raj', 'Teo']
|
103 |
+
driver_names = []
|
104 |
+
|
105 |
+
for i in range(n_vehicles):
|
106 |
+
if i < len(first_names):
|
107 |
+
driver_names.append(f"{first_names[i]} {random.choice(last_names)}")
|
108 |
+
else:
|
109 |
+
driver_names.append(f"{random.choice(first_names)} {random.choice(last_names)}")
|
110 |
+
|
111 |
+
# Vehicle availability
|
112 |
+
start_times = [f"{random.randint(7, 10):02d}:00" for _ in range(n_vehicles)]
|
113 |
+
end_times = [f"{random.randint(17, 21):02d}:00" for _ in range(n_vehicles)]
|
114 |
+
|
115 |
+
# Max working hours
|
116 |
+
max_working_hours = [random.randint(8, 10) for _ in range(n_vehicles)]
|
117 |
+
|
118 |
+
# Average speed (km/h)
|
119 |
+
avg_speeds = [random.uniform(30, 50) for _ in range(n_vehicles)]
|
120 |
+
|
121 |
+
# Cost per km
|
122 |
+
cost_per_km = [random.uniform(0.5, 1.5) for _ in range(n_vehicles)]
|
123 |
+
|
124 |
+
# Vehicle status
|
125 |
+
statuses = np.random.choice(['Available', 'In Service', 'Maintenance'], n_vehicles, p=[0.7, 0.2, 0.1])
|
126 |
+
|
127 |
+
# License plates (Singapore format)
|
128 |
+
letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
129 |
+
license_plates = []
|
130 |
+
for _ in range(n_vehicles):
|
131 |
+
letter_part = ''.join(random.choices(letters, k=3))
|
132 |
+
number_part = random.randint(1000, 9999)
|
133 |
+
license_plates.append(f"S{letter_part}{number_part}")
|
134 |
+
|
135 |
+
# Create DataFrame
|
136 |
+
df = pd.DataFrame({
|
137 |
+
'vehicle_id': vehicle_ids,
|
138 |
+
'vehicle_type': vehicle_types,
|
139 |
+
'license_plate': license_plates,
|
140 |
+
'driver_name': driver_names,
|
141 |
+
'max_weight_kg': np.array(max_weights).round(2),
|
142 |
+
'max_volume_m3': np.array(max_volumes).round(2),
|
143 |
+
'depot_name': depot_names,
|
144 |
+
'depot_latitude': np.array(depot_lats).round(6),
|
145 |
+
'depot_longitude': np.array(depot_lons).round(6),
|
146 |
+
'start_time': start_times,
|
147 |
+
'end_time': end_times,
|
148 |
+
'max_working_hours': max_working_hours,
|
149 |
+
'avg_speed_kmh': np.array(avg_speeds).round(2),
|
150 |
+
'cost_per_km': np.array(cost_per_km).round(2),
|
151 |
+
'status': statuses
|
152 |
+
})
|
153 |
+
|
154 |
+
# Ensure the directory exists
|
155 |
+
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'vehicle-data')
|
156 |
+
os.makedirs(data_dir, exist_ok=True)
|
157 |
+
|
158 |
+
# Save to CSV
|
159 |
+
output_path = os.path.join(data_dir, 'vehicle_data.csv')
|
160 |
+
df.to_csv(output_path, index=False)
|
161 |
+
print(f"Vehicle data generated and saved to {output_path}")
|
162 |
+
return df
|
163 |
+
|
164 |
+
if __name__ == "__main__":
|
165 |
+
# Generate vehicle data
|
166 |
+
vehicle_data = generate_vehicle_data(10)
|
167 |
+
print("Sample of vehicle data:")
|
168 |
+
print(vehicle_data.head())
|