Spaces:

Jing997
/

DeliveryRouteOptimisation

Running

App Files Files Community

DeliveryRouteOptimisation / src /utils /generate_vehicle_data.py

Jing997

add utils src

300be5a 5 days ago

raw

history blame contribute delete

6.58 kB

	import pandas as pd
	import numpy as np
	import random
	from datetime import datetime, timedelta
	import os

	# Set random seed for reproducibility
	np.random.seed(43)

	def generate_vehicle_data(n_vehicles=10):
	"""
	Generate synthetic vehicle data for a delivery fleet optimization problem.

	This function creates a realistic delivery fleet with various vehicle types,
	capacities, and operational parameters to be used in route optimization.

	Parameters:
	-----------
	n_vehicles : int, default=10
	Number of vehicles to generate in the fleet

	Returns:
	--------
	pd.DataFrame
	DataFrame containing the generated vehicle data
	"""

	# Vehicle IDs
	vehicle_ids = [f'VEH{str(i).zfill(3)}' for i in range(1, n_vehicles + 1)]

	# Vehicle types
	vehicle_types = []
	for _ in range(n_vehicles):
	vehicle_type = random.choices(['Standard', 'Large', 'Refrigerated'],
	weights=[0.7, 0.2, 0.1])[0]
	vehicle_types.append(vehicle_type)

	# Vehicle capacities based on type
	max_weights = []
	max_volumes = []
	for v_type in vehicle_types:
	if v_type == 'Standard':
	max_weights.append(random.uniform(800, 1200))
	max_volumes.append(random.uniform(8, 12))
	elif v_type == 'Large':
	max_weights.append(random.uniform(1500, 2500))
	max_volumes.append(random.uniform(15, 25))
	else: # Refrigerated
	max_weights.append(random.uniform(600, 1000))
	max_volumes.append(random.uniform(6, 10))

	# Realistic depot/warehouse locations in Singapore industrial areas
	# [name, latitude, longitude]
	warehouse_locations = [
	["Tuas Logistics Hub", 1.3187, 103.6390],
	["Jurong Industrial Estate", 1.3233, 103.6994],
	["Loyang Industrial Park", 1.3602, 103.9761],
	["Changi Logistics Centre", 1.3497, 103.9742],
	["Keppel Distripark", 1.2706, 103.8219],
	["Pandan Logistics Hub", 1.3187, 103.7509],
	["Alexandra Distripark", 1.2744, 103.8012],
	["Kallang Way Industrial", 1.3315, 103.8731],
	["Defu Industrial Park", 1.3610, 103.8891],
	["Woodlands Industrial", 1.4428, 103.7875]
	]

	# Assign warehouses to vehicles (multiple vehicles can be from same warehouse)
	# Either assign sequentially to ensure all warehouses are used at least once (if n_vehicles >= len(warehouse_locations)),
	# or randomly select from the list
	depot_names = []
	depot_lats = []
	depot_lons = []

	if n_vehicles <= len(warehouse_locations):
	# Use first n_vehicles warehouses (one vehicle per warehouse)
	selected_warehouses = warehouse_locations[:n_vehicles]
	else:
	# Ensure every warehouse is used at least once
	selected_warehouses = warehouse_locations.copy()
	# Then add random ones for remaining vehicles
	remaining = n_vehicles - len(warehouse_locations)
	selected_warehouses.extend([random.choice(warehouse_locations) for _ in range(remaining)])

	# Shuffle to avoid sequential assignment
	random.shuffle(selected_warehouses)

	# Extract depot information
	for warehouse in selected_warehouses:
	depot_names.append(warehouse[0])
	depot_lats.append(warehouse[1])
	depot_lons.append(warehouse[2])

	# Add small variation for vehicles from the same warehouse (within warehouse compound)
	# This makes each vehicle's position slightly different, simulating different loading bays
	for i in range(len(depot_lats)):
	# Much smaller variation - within warehouse compound (approximately 50-100m variation)
	depot_lats[i] += random.uniform(-0.0005, 0.0005)
	depot_lons[i] += random.uniform(-0.0005, 0.0005)

	# Driver names
	first_names = ['Ahmad', 'Raj', 'Michael', 'Wei', 'Siti', 'Kumar', 'Chong', 'David', 'Suresh', 'Ali']
	last_names = ['Tan', 'Singh', 'Lee', 'Wong', 'Kumar', 'Abdullah', 'Zhang', 'Lim', 'Raj', 'Teo']
	driver_names = []

	for i in range(n_vehicles):
	if i < len(first_names):
	driver_names.append(f"{first_names[i]} {random.choice(last_names)}")
	else:
	driver_names.append(f"{random.choice(first_names)} {random.choice(last_names)}")

	# Vehicle availability
	start_times = [f"{random.randint(7, 10):02d}:00" for _ in range(n_vehicles)]
	end_times = [f"{random.randint(17, 21):02d}:00" for _ in range(n_vehicles)]

	# Max working hours
	max_working_hours = [random.randint(8, 10) for _ in range(n_vehicles)]

	# Average speed (km/h)
	avg_speeds = [random.uniform(30, 50) for _ in range(n_vehicles)]

	# Cost per km
	cost_per_km = [random.uniform(0.5, 1.5) for _ in range(n_vehicles)]

	# Vehicle status
	statuses = np.random.choice(['Available', 'In Service', 'Maintenance'], n_vehicles, p=[0.7, 0.2, 0.1])

	# License plates (Singapore format)
	letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
	license_plates = []
	for _ in range(n_vehicles):
	letter_part = ''.join(random.choices(letters, k=3))
	number_part = random.randint(1000, 9999)
	license_plates.append(f"S{letter_part}{number_part}")

	# Create DataFrame
	df = pd.DataFrame({
	'vehicle_id': vehicle_ids,
	'vehicle_type': vehicle_types,
	'license_plate': license_plates,
	'driver_name': driver_names,
	'max_weight_kg': np.array(max_weights).round(2),
	'max_volume_m3': np.array(max_volumes).round(2),
	'depot_name': depot_names,
	'depot_latitude': np.array(depot_lats).round(6),
	'depot_longitude': np.array(depot_lons).round(6),
	'start_time': start_times,
	'end_time': end_times,
	'max_working_hours': max_working_hours,
	'avg_speed_kmh': np.array(avg_speeds).round(2),
	'cost_per_km': np.array(cost_per_km).round(2),
	'status': statuses
	})

	# Ensure the directory exists
	data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'data', 'vehicle-data')
	os.makedirs(data_dir, exist_ok=True)

	# Save to CSV
	output_path = os.path.join(data_dir, 'vehicle_data.csv')
	df.to_csv(output_path, index=False)
	print(f"Vehicle data generated and saved to {output_path}")
	return df

	if __name__ == "__main__":
	# Generate vehicle data
	vehicle_data = generate_vehicle_data(10)
	print("Sample of vehicle data:")
	print(vehicle_data.head())