DeliveryRouteOptimisation / src /utils /generate_delivery_data.py
Jing997's picture
add utils src
300be5a
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import os
import time
import requests
from geopy.geocoders import Nominatim
# Set random seed for reproducibility
np.random.seed(42)
def generate_delivery_data(n_deliveries=50, use_geocoding=False):
"""
Generate synthetic delivery data with realistic Singapore addresses
"""
# Define real Singapore neighborhoods and their actual streets
# Format: [neighborhood_name, [list_of_real_streets], postal_code_prefix]
sg_neighborhoods = [
['Ang Mo Kio', ['Ang Mo Kio Avenue 1', 'Ang Mo Kio Avenue 3', 'Ang Mo Kio Avenue 4', 'Ang Mo Kio Avenue 10'], '56'],
['Bedok', ['Bedok North Avenue 1', 'Bedok North Road', 'Bedok Reservoir Road', 'New Upper Changi Road'], '46'],
['Bishan', ['Bishan Street 11', 'Bishan Street 12', 'Bishan Street 13', 'Bishan Street 22'], '57'],
['Bukit Merah', ['Jalan Bukit Merah', 'Henderson Road', 'Tiong Bahru Road', 'Redhill Close'], '15'],
['Bukit Batok', ['Bukit Batok East Avenue 6', 'Bukit Batok West Avenue 8', 'Bukit Batok Street 21'], '65'],
['Clementi', ['Clementi Avenue 1', 'Clementi Avenue 4', 'Clementi Road', 'Commonwealth Avenue West'], '12'],
['Geylang', ['Geylang East Avenue 1', 'Geylang Road', 'Guillemard Road', 'Sims Avenue'], '38'],
['Hougang', ['Hougang Avenue 1', 'Hougang Avenue 7', 'Hougang Street 91', 'Upper Serangoon Road'], '53'],
['Jurong East', ['Jurong East Street 13', 'Jurong East Avenue 1', 'Jurong Gateway Road'], '60'],
['Jurong West', ['Jurong West Street 41', 'Jurong West Street 52', 'Jurong West Street 93'], '64'],
['Kallang', ['Kallang Avenue', 'Geylang Bahru', 'Boon Keng Road', 'Upper Boon Keng Road'], '33'],
['Punggol', ['Punggol Central', 'Punggol Field', 'Punggol Road', 'Punggol Way'], '82'],
['Queenstown', ['Commonwealth Avenue', 'Commonwealth Drive', 'Mei Chin Road', 'Stirling Road'], '14'],
['Sengkang', ['Sengkang East Way', 'Sengkang West Way', 'Compassvale Road', 'Fernvale Road'], '54'],
['Serangoon', ['Serangoon Avenue 2', 'Serangoon Avenue 3', 'Serangoon North Avenue 1'], '55'],
['Tampines', ['Tampines Street 11', 'Tampines Street 21', 'Tampines Avenue 1', 'Tampines Avenue 4'], '52'],
['Toa Payoh', ['Toa Payoh Lorong 1', 'Toa Payoh Lorong 2', 'Toa Payoh Lorong 4', 'Toa Payoh Central'], '31'],
['Woodlands', ['Woodlands Avenue 1', 'Woodlands Drive 16', 'Woodlands Drive 72', 'Woodlands Circle'], '73'],
['Yishun', ['Yishun Avenue 1', 'Yishun Avenue 4', 'Yishun Ring Road', 'Yishun Street 22'], '76']
]
# Bounding boxes for neighborhoods (for fallback coordinates)
# Format: [name, min_lat, max_lat, min_lon, max_lon]
neighborhood_bounds = {
'Ang Mo Kio': [1.360000, 1.380000, 103.830000, 103.860000],
'Bedok': [1.320000, 1.335000, 103.920000, 103.950000],
'Bishan': [1.345000, 1.360000, 103.830000, 103.855000],
'Bukit Merah': [1.270000, 1.290000, 103.800000, 103.830000],
'Bukit Batok': [1.340000, 1.360000, 103.740000, 103.770000],
'Clementi': [1.310000, 1.325000, 103.750000, 103.780000],
'Geylang': [1.310000, 1.325000, 103.880000, 103.900000],
'Hougang': [1.370000, 1.385000, 103.880000, 103.900000],
'Jurong East': [1.330000, 1.345000, 103.730000, 103.750000],
'Jurong West': [1.340000, 1.360000, 103.690000, 103.720000],
'Kallang': [1.300000, 1.320000, 103.850000, 103.880000],
'Punggol': [1.390000, 1.410000, 103.900000, 103.920000],
'Queenstown': [1.290000, 1.310000, 103.780000, 103.805000],
'Sengkang': [1.380000, 1.395000, 103.870000, 103.900000],
'Serangoon': [1.345000, 1.360000, 103.865000, 103.885000],
'Tampines': [1.345000, 1.365000, 103.930000, 103.960000],
'Toa Payoh': [1.326000, 1.341000, 103.840000, 103.865000],
'Woodlands': [1.430000, 1.450000, 103.770000, 103.800000],
'Yishun': [1.410000, 1.430000, 103.820000, 103.850000]
}
# Generate delivery IDs
delivery_ids = [f'DEL{str(i).zfill(4)}' for i in range(1, n_deliveries + 1)]
# Generate customer names (fictional)
first_names = ['Tan', 'Lim', 'Lee', 'Ng', 'Wong', 'Chan', 'Goh', 'Ong', 'Teo', 'Koh',
'Chua', 'Loh', 'Yeo', 'Sim', 'Ho', 'Ang', 'Tay', 'Yap', 'Leong', 'Foo']
last_names = ['Wei', 'Ming', 'Hui', 'Ling', 'Yong', 'Jun', 'Hong', 'Xin', 'Yi', 'Jie',
'Cheng', 'Kai', 'Zhi', 'Tian', 'Yu', 'En', 'Yang', 'Hao', 'Chong', 'Zheng']
customer_names = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(n_deliveries)]
addresses = []
postal_codes = []
latitudes = []
longitudes = []
neighborhood_names = []
# Initialize geocoder if using geocoding
if use_geocoding:
geolocator = Nominatim(user_agent="delivery_app")
# Generate realistic addresses
for i in range(n_deliveries):
# Randomly select a neighborhood
neighborhood_data = random.choice(sg_neighborhoods)
neighborhood = neighborhood_data[0]
streets = neighborhood_data[1]
postal_prefix = neighborhood_data[2]
# Randomly select a street in that neighborhood
street = random.choice(streets)
# Generate block number (realistic for HDB)
block = random.randint(100, 600)
# Generate unit number
unit_floor = random.randint(2, 20)
unit_number = random.randint(1, 150)
# Generate postal code (with realistic prefix)
postal_suffix = str(random.randint(0, 999)).zfill(3)
postal_code = postal_prefix + postal_suffix
# Create two formats of address - one for display, one for geocoding
display_address = f"Block {block}, #{unit_floor:02d}-{unit_number:03d}, {street}, Singapore {postal_code}"
geocode_address = f"{block} {street}, Singapore {postal_code}" # Simpler format for geocoding
# Default coordinates from neighborhood bounding box (fallback)
bounds = neighborhood_bounds[neighborhood]
default_lat = round(random.uniform(bounds[0], bounds[1]), 6)
default_lon = round(random.uniform(bounds[2], bounds[3]), 6)
# Use geocoding API if requested
if use_geocoding:
try:
location = geolocator.geocode(geocode_address)
if location:
lat = location.latitude
lon = location.longitude
print(f"βœ“ Successfully geocoded: {geocode_address} β†’ ({lat}, {lon})")
else:
# First fallback: try with just street and postal code
simpler_address = f"{street}, Singapore {postal_code}"
location = geolocator.geocode(simpler_address)
if location:
lat = location.latitude
lon = location.longitude
print(f"βœ“ Fallback geocoded: {simpler_address} β†’ ({lat}, {lon})")
else:
# Second fallback: just use the neighborhood center
lat = default_lat
lon = default_lon
print(f"βœ— Could not geocode: {geocode_address}, using neighborhood coordinates")
# Add delay to avoid being rate limited
time.sleep(1)
except Exception as e:
print(f"βœ— Geocoding error for {geocode_address}: {str(e)}")
lat = default_lat
lon = default_lon
else:
# Without geocoding, use the default coordinates
lat = default_lat
lon = default_lon
addresses.append(display_address)
postal_codes.append(postal_code)
latitudes.append(lat)
longitudes.append(lon)
neighborhood_names.append(neighborhood)
# Generate delivery dates (within the next 7 days)
base_date = datetime.now().date()
delivery_dates = [base_date + timedelta(days=random.randint(1, 7)) for _ in range(n_deliveries)]
# Generate time windows (between 9 AM and 5 PM)
time_windows = []
for _ in range(n_deliveries):
start_hour = random.randint(9, 16)
window_length = random.choice([1, 2, 3]) # 1, 2, or 3 hour windows
end_hour = min(start_hour + window_length, 18)
start_time = f"{start_hour:02d}:00"
end_time = f"{end_hour:02d}:00"
time_windows.append(f"{start_time}-{end_time}")
# Generate package details
weights = np.random.uniform(0.5, 20.0, n_deliveries) # in kg
volumes = np.random.uniform(0.01, 0.5, n_deliveries) # in mΒ³
# Priority levels
priorities = np.random.choice(['High', 'Medium', 'Low'], n_deliveries,
p=[0.2, 0.5, 0.3]) # 20% High, 50% Medium, 30% Low
# Required vehicle type
vehicle_types = np.random.choice(['Standard', 'Large', 'Refrigerated'], n_deliveries,
p=[0.7, 0.2, 0.1])
# Status
statuses = np.random.choice(['Pending', 'Assigned', 'In Transit', 'Delivered'], n_deliveries,
p=[0.6, 0.2, 0.15, 0.05])
# Additional notes
notes = []
special_instructions = [
'Call customer before delivery',
'Fragile items',
'Leave at door',
'Signature required',
'No delivery on weekends',
None
]
for _ in range(n_deliveries):
if random.random() < 0.7: # 70% chance of having a note
notes.append(random.choice(special_instructions))
else:
notes.append(None)
# Create DataFrame
df = pd.DataFrame({
'delivery_id': delivery_ids,
'customer_name': customer_names,
'address': addresses,
'postal_code': postal_codes,
'neighborhood': neighborhood_names,
'latitude': latitudes,
'longitude': longitudes,
'delivery_date': delivery_dates,
'time_window': time_windows,
'weight_kg': weights.round(2),
'volume_m3': volumes.round(3),
'priority': priorities,
'vehicle_type': vehicle_types,
'status': statuses,
'special_instructions': notes
})
# Ensure the directory exists
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'delivery-data')
os.makedirs(data_dir, exist_ok=True)
# Save to CSV
output_path = os.path.join(data_dir, 'delivery_data.csv')
df.to_csv(output_path, index=False)
print(f"Delivery data generated and saved to {output_path}")
return df
if __name__ == "__main__":
# Set to True if you want to use real geocoding (slower but more accurate)
USE_GEOCODING = True
delivery_data = generate_delivery_data(50, use_geocoding=USE_GEOCODING)
print("Sample of delivery data:")
print(delivery_data.head())