|
import pandas as pd |
|
import numpy as np |
|
import random |
|
from datetime import datetime, timedelta |
|
import os |
|
import time |
|
import requests |
|
from geopy.geocoders import Nominatim |
|
|
|
|
|
np.random.seed(42) |
|
|
|
def generate_delivery_data(n_deliveries=50, use_geocoding=False): |
|
""" |
|
Generate synthetic delivery data with realistic Singapore addresses |
|
""" |
|
|
|
|
|
sg_neighborhoods = [ |
|
['Ang Mo Kio', ['Ang Mo Kio Avenue 1', 'Ang Mo Kio Avenue 3', 'Ang Mo Kio Avenue 4', 'Ang Mo Kio Avenue 10'], '56'], |
|
['Bedok', ['Bedok North Avenue 1', 'Bedok North Road', 'Bedok Reservoir Road', 'New Upper Changi Road'], '46'], |
|
['Bishan', ['Bishan Street 11', 'Bishan Street 12', 'Bishan Street 13', 'Bishan Street 22'], '57'], |
|
['Bukit Merah', ['Jalan Bukit Merah', 'Henderson Road', 'Tiong Bahru Road', 'Redhill Close'], '15'], |
|
['Bukit Batok', ['Bukit Batok East Avenue 6', 'Bukit Batok West Avenue 8', 'Bukit Batok Street 21'], '65'], |
|
['Clementi', ['Clementi Avenue 1', 'Clementi Avenue 4', 'Clementi Road', 'Commonwealth Avenue West'], '12'], |
|
['Geylang', ['Geylang East Avenue 1', 'Geylang Road', 'Guillemard Road', 'Sims Avenue'], '38'], |
|
['Hougang', ['Hougang Avenue 1', 'Hougang Avenue 7', 'Hougang Street 91', 'Upper Serangoon Road'], '53'], |
|
['Jurong East', ['Jurong East Street 13', 'Jurong East Avenue 1', 'Jurong Gateway Road'], '60'], |
|
['Jurong West', ['Jurong West Street 41', 'Jurong West Street 52', 'Jurong West Street 93'], '64'], |
|
['Kallang', ['Kallang Avenue', 'Geylang Bahru', 'Boon Keng Road', 'Upper Boon Keng Road'], '33'], |
|
['Punggol', ['Punggol Central', 'Punggol Field', 'Punggol Road', 'Punggol Way'], '82'], |
|
['Queenstown', ['Commonwealth Avenue', 'Commonwealth Drive', 'Mei Chin Road', 'Stirling Road'], '14'], |
|
['Sengkang', ['Sengkang East Way', 'Sengkang West Way', 'Compassvale Road', 'Fernvale Road'], '54'], |
|
['Serangoon', ['Serangoon Avenue 2', 'Serangoon Avenue 3', 'Serangoon North Avenue 1'], '55'], |
|
['Tampines', ['Tampines Street 11', 'Tampines Street 21', 'Tampines Avenue 1', 'Tampines Avenue 4'], '52'], |
|
['Toa Payoh', ['Toa Payoh Lorong 1', 'Toa Payoh Lorong 2', 'Toa Payoh Lorong 4', 'Toa Payoh Central'], '31'], |
|
['Woodlands', ['Woodlands Avenue 1', 'Woodlands Drive 16', 'Woodlands Drive 72', 'Woodlands Circle'], '73'], |
|
['Yishun', ['Yishun Avenue 1', 'Yishun Avenue 4', 'Yishun Ring Road', 'Yishun Street 22'], '76'] |
|
] |
|
|
|
|
|
|
|
neighborhood_bounds = { |
|
'Ang Mo Kio': [1.360000, 1.380000, 103.830000, 103.860000], |
|
'Bedok': [1.320000, 1.335000, 103.920000, 103.950000], |
|
'Bishan': [1.345000, 1.360000, 103.830000, 103.855000], |
|
'Bukit Merah': [1.270000, 1.290000, 103.800000, 103.830000], |
|
'Bukit Batok': [1.340000, 1.360000, 103.740000, 103.770000], |
|
'Clementi': [1.310000, 1.325000, 103.750000, 103.780000], |
|
'Geylang': [1.310000, 1.325000, 103.880000, 103.900000], |
|
'Hougang': [1.370000, 1.385000, 103.880000, 103.900000], |
|
'Jurong East': [1.330000, 1.345000, 103.730000, 103.750000], |
|
'Jurong West': [1.340000, 1.360000, 103.690000, 103.720000], |
|
'Kallang': [1.300000, 1.320000, 103.850000, 103.880000], |
|
'Punggol': [1.390000, 1.410000, 103.900000, 103.920000], |
|
'Queenstown': [1.290000, 1.310000, 103.780000, 103.805000], |
|
'Sengkang': [1.380000, 1.395000, 103.870000, 103.900000], |
|
'Serangoon': [1.345000, 1.360000, 103.865000, 103.885000], |
|
'Tampines': [1.345000, 1.365000, 103.930000, 103.960000], |
|
'Toa Payoh': [1.326000, 1.341000, 103.840000, 103.865000], |
|
'Woodlands': [1.430000, 1.450000, 103.770000, 103.800000], |
|
'Yishun': [1.410000, 1.430000, 103.820000, 103.850000] |
|
} |
|
|
|
|
|
delivery_ids = [f'DEL{str(i).zfill(4)}' for i in range(1, n_deliveries + 1)] |
|
|
|
|
|
first_names = ['Tan', 'Lim', 'Lee', 'Ng', 'Wong', 'Chan', 'Goh', 'Ong', 'Teo', 'Koh', |
|
'Chua', 'Loh', 'Yeo', 'Sim', 'Ho', 'Ang', 'Tay', 'Yap', 'Leong', 'Foo'] |
|
last_names = ['Wei', 'Ming', 'Hui', 'Ling', 'Yong', 'Jun', 'Hong', 'Xin', 'Yi', 'Jie', |
|
'Cheng', 'Kai', 'Zhi', 'Tian', 'Yu', 'En', 'Yang', 'Hao', 'Chong', 'Zheng'] |
|
customer_names = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(n_deliveries)] |
|
|
|
addresses = [] |
|
postal_codes = [] |
|
latitudes = [] |
|
longitudes = [] |
|
neighborhood_names = [] |
|
|
|
|
|
if use_geocoding: |
|
geolocator = Nominatim(user_agent="delivery_app") |
|
|
|
|
|
for i in range(n_deliveries): |
|
|
|
neighborhood_data = random.choice(sg_neighborhoods) |
|
neighborhood = neighborhood_data[0] |
|
streets = neighborhood_data[1] |
|
postal_prefix = neighborhood_data[2] |
|
|
|
|
|
street = random.choice(streets) |
|
|
|
|
|
block = random.randint(100, 600) |
|
|
|
|
|
unit_floor = random.randint(2, 20) |
|
unit_number = random.randint(1, 150) |
|
|
|
|
|
postal_suffix = str(random.randint(0, 999)).zfill(3) |
|
postal_code = postal_prefix + postal_suffix |
|
|
|
|
|
display_address = f"Block {block}, #{unit_floor:02d}-{unit_number:03d}, {street}, Singapore {postal_code}" |
|
geocode_address = f"{block} {street}, Singapore {postal_code}" |
|
|
|
|
|
bounds = neighborhood_bounds[neighborhood] |
|
default_lat = round(random.uniform(bounds[0], bounds[1]), 6) |
|
default_lon = round(random.uniform(bounds[2], bounds[3]), 6) |
|
|
|
|
|
if use_geocoding: |
|
try: |
|
location = geolocator.geocode(geocode_address) |
|
|
|
if location: |
|
lat = location.latitude |
|
lon = location.longitude |
|
print(f"β Successfully geocoded: {geocode_address} β ({lat}, {lon})") |
|
else: |
|
|
|
simpler_address = f"{street}, Singapore {postal_code}" |
|
location = geolocator.geocode(simpler_address) |
|
|
|
if location: |
|
lat = location.latitude |
|
lon = location.longitude |
|
print(f"β Fallback geocoded: {simpler_address} β ({lat}, {lon})") |
|
else: |
|
|
|
lat = default_lat |
|
lon = default_lon |
|
print(f"β Could not geocode: {geocode_address}, using neighborhood coordinates") |
|
|
|
|
|
time.sleep(1) |
|
|
|
except Exception as e: |
|
print(f"β Geocoding error for {geocode_address}: {str(e)}") |
|
lat = default_lat |
|
lon = default_lon |
|
else: |
|
|
|
lat = default_lat |
|
lon = default_lon |
|
|
|
addresses.append(display_address) |
|
postal_codes.append(postal_code) |
|
latitudes.append(lat) |
|
longitudes.append(lon) |
|
neighborhood_names.append(neighborhood) |
|
|
|
|
|
base_date = datetime.now().date() |
|
delivery_dates = [base_date + timedelta(days=random.randint(1, 7)) for _ in range(n_deliveries)] |
|
|
|
|
|
time_windows = [] |
|
for _ in range(n_deliveries): |
|
start_hour = random.randint(9, 16) |
|
window_length = random.choice([1, 2, 3]) |
|
end_hour = min(start_hour + window_length, 18) |
|
|
|
start_time = f"{start_hour:02d}:00" |
|
end_time = f"{end_hour:02d}:00" |
|
time_windows.append(f"{start_time}-{end_time}") |
|
|
|
|
|
weights = np.random.uniform(0.5, 20.0, n_deliveries) |
|
volumes = np.random.uniform(0.01, 0.5, n_deliveries) |
|
|
|
|
|
priorities = np.random.choice(['High', 'Medium', 'Low'], n_deliveries, |
|
p=[0.2, 0.5, 0.3]) |
|
|
|
|
|
vehicle_types = np.random.choice(['Standard', 'Large', 'Refrigerated'], n_deliveries, |
|
p=[0.7, 0.2, 0.1]) |
|
|
|
|
|
statuses = np.random.choice(['Pending', 'Assigned', 'In Transit', 'Delivered'], n_deliveries, |
|
p=[0.6, 0.2, 0.15, 0.05]) |
|
|
|
|
|
notes = [] |
|
special_instructions = [ |
|
'Call customer before delivery', |
|
'Fragile items', |
|
'Leave at door', |
|
'Signature required', |
|
'No delivery on weekends', |
|
None |
|
] |
|
|
|
for _ in range(n_deliveries): |
|
if random.random() < 0.7: |
|
notes.append(random.choice(special_instructions)) |
|
else: |
|
notes.append(None) |
|
|
|
|
|
df = pd.DataFrame({ |
|
'delivery_id': delivery_ids, |
|
'customer_name': customer_names, |
|
'address': addresses, |
|
'postal_code': postal_codes, |
|
'neighborhood': neighborhood_names, |
|
'latitude': latitudes, |
|
'longitude': longitudes, |
|
'delivery_date': delivery_dates, |
|
'time_window': time_windows, |
|
'weight_kg': weights.round(2), |
|
'volume_m3': volumes.round(3), |
|
'priority': priorities, |
|
'vehicle_type': vehicle_types, |
|
'status': statuses, |
|
'special_instructions': notes |
|
}) |
|
|
|
|
|
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'delivery-data') |
|
os.makedirs(data_dir, exist_ok=True) |
|
|
|
|
|
output_path = os.path.join(data_dir, 'delivery_data.csv') |
|
df.to_csv(output_path, index=False) |
|
print(f"Delivery data generated and saved to {output_path}") |
|
return df |
|
|
|
if __name__ == "__main__": |
|
|
|
USE_GEOCODING = True |
|
delivery_data = generate_delivery_data(50, use_geocoding=USE_GEOCODING) |
|
print("Sample of delivery data:") |
|
print(delivery_data.head()) |
|
|
|
|