# streamlit_app.py - Bolt Driver Recommendation System
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import folium
from folium.plugins import HeatMap, MarkerCluster
from streamlit_folium import folium_static
import pickle
import os
# Set page configuration
st.set_page_config(
page_title="Bolt Driver Recommendation System",
page_icon="🚖",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS styling
st.markdown("""
""", unsafe_allow_html=True)
# Header and app description
st.markdown('
Bolt Driver Recommendation System
', unsafe_allow_html=True)
with st.container():
st.markdown('This application helps Bolt drivers find optimal areas to position themselves based on predicted ride demand and value. The recommendations are personalized based on time, location, and driver preferences.
', unsafe_allow_html=True)
class DemandPredictionModel:
def __init__(self):
"""Initialize the demand prediction model"""
# In a real app, we would load the model from a file
# Here we'll create a dummy version for demonstration
self.setup_demo_data()
def setup_demo_data(self):
"""Set up demonstration data based on our analysis"""
# Define geographic boundaries (Tallinn)
self.min_lat, self.max_lat = 59.32, 59.57
self.min_lng, self.max_lng = 24.51, 24.97
# Create grid
grid_size = 10
self.lat_step = (self.max_lat - self.min_lat) / grid_size
self.lng_step = (self.max_lng - self.min_lng) / grid_size
# Generate lat/lng bins
self.lat_bins = np.linspace(self.min_lat, self.max_lat, grid_size + 1)
self.lng_bins = np.linspace(self.min_lng, self.max_lng, grid_size + 1)
# Create demand patterns based on our findings
self.demand_patterns = self.create_demand_patterns()
def create_demand_patterns(self):
"""Create realistic demand patterns based on our analysis"""
# Initialize 4D array: [day_of_week][hour][lat_bin][lng_bin]
days = 7
hours = 24
lat_bins = len(self.lat_bins) - 1
lng_bins = len(self.lng_bins) - 1
demand_patterns = np.zeros((days, hours, lat_bins, lng_bins))
value_patterns = np.zeros((days, hours, lat_bins, lng_bins))
# Key areas from our analysis
city_center = {"lat_idx": 4, "lng_idx": 5, "base_demand": 300, "value": 1.91}
secondary_hub = {"lat_idx": 4, "lng_idx": 4, "base_demand": 150, "value": 1.94}
university_area = {"lat_idx": 3, "lng_idx": 4, "base_demand": 80, "value": 2.89}
residential_zone = {"lat_idx": 3, "lng_idx": 3, "base_demand": 60, "value": 1.85}
business_district = {"lat_idx": 4, "lng_idx": 6, "base_demand": 50, "value": 1.56}
hotspots = [city_center, secondary_hub, university_area, residential_zone, business_district]
# Time patterns
hourly_factors = {
0: 0.5, 1: 0.4, 2: 0.3, 3: 0.3, 4: 0.3, 5: 0.5,
6: 0.8, 7: 0.9, 8: 0.7, 9: 0.6, 10: 0.6, 11: 0.6,
12: 0.7, 13: 0.8, 14: 0.9, 15: 1.0, 16: 1.0, 17: 0.8,
18: 0.7, 19: 0.7, 20: 0.7, 21: 0.8, 22: 0.9, 23: 0.7
}
# Value patterns - certain times have higher values
value_factors = {
0: 1.4, 1: 0.8, 2: 1.0, 3: 0.6, 4: 1.6, 5: 0.7,
6: 0.9, 7: 1.1, 8: 1.0, 9: 0.7, 10: 0.8, 11: 1.1,
12: 0.8, 13: 0.9, 14: 1.6, 15: 0.9, 16: 0.8, 17: 1.0,
18: 0.8, 19: 0.7, 20: 1.1, 21: 0.8, 22: 1.0, 23: 1.2
}
# Day patterns
day_factors = {
0: 0.8, # Monday
1: 0.9, # Tuesday
2: 0.9, # Wednesday
3: 0.85, # Thursday
4: 0.95, # Friday
5: 1.0, # Saturday
6: 0.8 # Sunday
}
# Fill the demand patterns
for day in range(days):
for hour in range(hours):
# Apply base patterns with temporal variations
time_factor = hourly_factors[hour] * day_factors[day]
# Add some specific day-hour combinations
# Tuesday and Thursday early morning and late night have higher values
special_value_factor = 1.0
if (day == 1 or day == 3) and (hour in [4, 22, 23]):
special_value_factor = 2.0
for spot in hotspots:
lat_idx, lng_idx = spot["lat_idx"], spot["lng_idx"]
base_demand = spot["base_demand"]
base_value = spot["value"]
# Set demand
demand = base_demand * time_factor
# Add some randomness
demand *= np.random.uniform(0.9, 1.1)
demand_patterns[day, hour, lat_idx, lng_idx] = demand
# Set value
value = base_value * value_factors[hour] * special_value_factor
# Add some randomness
value *= np.random.uniform(0.95, 1.05)
value_patterns[day, hour, lat_idx, lng_idx] = value
# Add some spillover to neighboring cells
for d_lat in [-1, 0, 1]:
for d_lng in [-1, 0, 1]:
if d_lat == 0 and d_lng == 0:
continue
n_lat = lat_idx + d_lat
n_lng = lng_idx + d_lng
if (0 <= n_lat < lat_bins and 0 <= n_lng < lng_bins):
# Spillover decreases with distance
distance = np.sqrt(d_lat**2 + d_lng**2)
spillover_factor = 0.5 / distance
demand_patterns[day, hour, n_lat, n_lng] += demand * spillover_factor
value_patterns[day, hour, n_lat, n_lng] += value * 0.9 # Slightly lower values in spillover areas
# Create combined dict
patterns = {
"demand": demand_patterns,
"value": value_patterns
}
return patterns
def predict(self, day, hour, current_lat=None, current_lng=None, value_weight=0.5, top_n=5):
"""
Predict high-demand areas for a given day and hour
Parameters:
- day: Day of week (0=Monday, 6=Sunday)
- hour: Hour of day (0-23)
- current_lat: Driver's current latitude (optional)
- current_lng: Driver's current longitude (optional)
- value_weight: Weight for balancing demand vs value (0-1)
- top_n: Number of recommendations to return
Returns:
- List of recommended areas
"""
demand_matrix = self.demand_patterns["demand"][day, hour]
value_matrix = self.demand_patterns["value"][day, hour]
# Flatten the matrices for ranking
recommendations = []
for lat_idx in range(len(self.lat_bins) - 1):
for lng_idx in range(len(self.lng_bins) - 1):
demand = demand_matrix[lat_idx, lng_idx]
value = value_matrix[lat_idx, lng_idx]
if demand > 0:
center_lat = (self.lat_bins[lat_idx] + self.lat_bins[lat_idx + 1]) / 2
center_lng = (self.lng_bins[lng_idx] + self.lng_bins[lng_idx + 1]) / 2
# Calculate distance if driver location provided
distance_km = None
if current_lat is not None and current_lng is not None:
# Calculate Haversine distance
R = 6371 # Earth radius in kilometers
dLat = np.radians(current_lat - center_lat)
dLon = np.radians(current_lng - center_lng)
a = (np.sin(dLat/2) * np.sin(dLat/2) +
np.cos(np.radians(current_lat)) * np.cos(np.radians(center_lat)) *
np.sin(dLon/2) * np.sin(dLon/2))
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
distance_km = R * c
# Scale demand and value for scoring
max_demand = np.max(demand_matrix)
max_value = np.max(value_matrix)
demand_score = demand / max_demand if max_demand > 0 else 0
value_score = value / max_value if max_value > 0 else 0
# Combined score based on value weight
score = (1 - value_weight) * demand_score + value_weight * value_score
# Adjust for distance if available
if distance_km is not None:
# Distance penalty (decreases as distance increases)
# Effective range ~10km
distance_penalty = 1.0 / (1.0 + distance_km / 5.0)
adjusted_score = score * distance_penalty
else:
adjusted_score = score
recommendations.append({
"center_lat": center_lat,
"center_lng": center_lng,
"predicted_rides": demand,
"avg_value": value,
"expected_value": demand * value,
"score": score,
"adjusted_score": adjusted_score,
"distance_km": distance_km
})
# Sort by adjusted score
sorted_recommendations = sorted(recommendations, key=lambda x: x["adjusted_score"], reverse=True)
return sorted_recommendations[:top_n]
# Main application flow
def main():
# Initialize model
model = DemandPredictionModel()
# Sidebar for inputs
with st.sidebar:
st.markdown('', unsafe_allow_html=True)
# Time selection
st.subheader("Time Selection")
today = datetime.now()
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
selected_day = st.selectbox("Day of Week", days, index=today.weekday())
day_idx = days.index(selected_day)
selected_hour = st.slider("Hour of Day", 0, 23, today.hour, format="%d:00")
# Location input
st.subheader("Driver Location")
use_location = st.checkbox("Use Current Location", value=True)
# Default to Tallinn center
default_lat, default_lng = 59.436, 24.753
if use_location:
col1, col2 = st.columns(2)
with col1:
current_lat = st.number_input("Latitude", value=default_lat, format="%.5f", step=0.001)
with col2:
current_lng = st.number_input("Longitude", value=default_lng, format="%.5f", step=0.001)
else:
current_lat, current_lng = None, None
# Preference settings
st.subheader("Preferences")
num_recommendations = st.slider("Number of Recommendations", 3, 10, 5)
value_weight = st.slider(
"Optimization Balance",
min_value=0.0,
max_value=1.0,
value=0.5,
step=0.1,
help="0 = Focus on ride count, 1 = Focus on ride value"
)
# Advanced options for visual
st.subheader("Display Options")
show_heatmap = st.checkbox("Show Demand Heatmap", value=True)
# Generate recommendations
recommendations = model.predict(
day=day_idx,
hour=selected_hour,
current_lat=current_lat if use_location else None,
current_lng=current_lng if use_location else None,
value_weight=value_weight,
top_n=num_recommendations
)
# Main content area
col1, col2 = st.columns([3, 2])
with col1:
st.markdown('', unsafe_allow_html=True)
try:
# Create map
m = folium.Map(
location=[59.436, 24.753], # Tallinn center
zoom_start=12,
tiles="CartoDB positron"
)
# Add driver marker if location provided
if use_location:
folium.Marker(
location=[current_lat, current_lng],
popup="Your Location",
icon=folium.Icon(color="blue", icon="user", prefix="fa"),
tooltip="Your Current Location"
).add_to(m)
# Add recommendation markers
for i, rec in enumerate(recommendations):
folium.CircleMarker(
location=[rec["center_lat"], rec["center_lng"]],
radius=20,
color="red",
fill=True,
fill_color="red",
fill_opacity=0.6,
popup=f"""
Recommendation {i+1}
Expected rides: {rec['predicted_rides']:.1f}
Avg value: €{rec['avg_value']:.2f}
Expected value: €{rec['expected_value']:.2f}
{f'Distance: {rec["distance_km"]:.2f} km' if rec["distance_km"] is not None else ''}
"""
).add_to(m)
# Add number label - using HTML directly to avoid the split error
folium.Marker(
location=[rec["center_lat"], rec["center_lng"]],
icon=folium.DivIcon(
html=f'{i+1}
'
)
).add_to(m)
# Add heatmap if enabled
if show_heatmap:
# Get a larger set of predictions for the heatmap
all_predictions = model.predict(day_idx, selected_hour, top_n=100)
heat_data = [
[pred["center_lat"], pred["center_lng"], pred["predicted_rides"]]
for pred in all_predictions
]
# Add heatmap layer
HeatMap(
heat_data,
radius=15,
gradient={
0.2: 'blue',
0.4: 'lime',
0.6: 'yellow',
0.8: 'orange',
1.0: 'red'
},
name="Demand Heatmap",
show=True
).add_to(m)
# Add layer control
folium.LayerControl().add_to(m)
# Display the map
folium_static(m, width=700)
except Exception as e:
st.error(f"Error rendering map: {e}")
st.info("Showing tabular results instead.")
with col2:
st.markdown('', unsafe_allow_html=True)
# Create metrics for top recommendation
if recommendations:
top_rec = recommendations[0]
st.markdown('', unsafe_allow_html=True)
st.subheader("Top Recommendation")
col1, col2 = st.columns(2)
with col1:
st.metric("Expected Rides", f"{top_rec['predicted_rides']:.1f}")
st.metric("Avg Value", f"€{top_rec['avg_value']:.2f}")
with col2:
st.metric("Expected Value", f"€{top_rec['expected_value']:.2f}")
if top_rec["distance_km"] is not None:
st.metric("Distance", f"{top_rec['distance_km']:.2f} km")
st.markdown(f"Location: [{top_rec['center_lat']:.4f}, {top_rec['center_lng']:.4f}]")
st.markdown('
', unsafe_allow_html=True)
# Create formatted table of all recommendations
st.subheader("All Recommendations")
rec_df = pd.DataFrame(recommendations)
# Format for display
display_df = pd.DataFrame({
"Rank": range(1, len(rec_df) + 1),
"Expected Rides": rec_df["predicted_rides"].round(1),
"Avg Value (€)": rec_df["avg_value"].round(2),
"Expected Value (€)": rec_df["expected_value"].round(2)
})
# Add distance if available
if "distance_km" in rec_df.columns and rec_df["distance_km"].notna().any():
display_df["Distance (km)"] = rec_df["distance_km"].round(2)
st.table(display_df)
# Add explanation for score calculation
st.markdown('', unsafe_allow_html=True)
st.markdown("**How recommendations are calculated:**")
st.markdown("""
- Ride count predictions based on historical patterns
- Value based on average ride fares
- Recommendations balanced by your preferences
- Distance factored in when location is provided
""")
st.markdown('
', unsafe_allow_html=True)
# Time series visualization
st.markdown('', unsafe_allow_html=True)
tab1, tab2 = st.tabs(["Hourly Patterns", "Daily Patterns"])
with tab1:
# Generate hourly demand data for the selected day
hourly_data = []
for hour in range(24):
hour_recs = model.predict(day_idx, hour, top_n=100)
total_demand = sum(rec["predicted_rides"] for rec in hour_recs)
avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in hour_recs) / total_demand if total_demand > 0 else 0
hourly_data.append({
"hour": hour,
"demand": total_demand,
"value": avg_value
})
hourly_df = pd.DataFrame(hourly_data)
# Create dual-axis chart
fig = go.Figure()
# Add demand line
fig.add_trace(go.Scatter(
x=hourly_df["hour"],
y=hourly_df["demand"],
name="Demand",
line=dict(color="#4e8cff", width=3),
hovertemplate="Hour: %{x}
Demand: %{y:.1f}"
))
# Add value line on secondary axis
fig.add_trace(go.Scatter(
x=hourly_df["hour"],
y=hourly_df["value"],
name="Avg Value (€)",
line=dict(color="#ff6b6b", width=3, dash="dot"),
yaxis="y2",
hovertemplate="Hour: %{x}
Avg Value: €%{y:.2f}"
))
# Highlight selected hour
fig.add_vline(
x=selected_hour,
line_width=2,
line_dash="dash",
line_color="green",
annotation_text="Selected Hour",
annotation_position="top right"
)
# Update layout
fig.update_layout(
title=f"Hourly Demand Pattern for {selected_day}",
xaxis=dict(
title="Hour of Day",
tickmode="linear",
tick0=0,
dtick=1
),
yaxis=dict(
title="Demand (Expected Rides)",
titlefont=dict(color="#4e8cff"),
tickfont=dict(color="#4e8cff")
),
yaxis2=dict(
title="Average Value (€)",
titlefont=dict(color="#ff6b6b"),
tickfont=dict(color="#ff6b6b"),
anchor="x",
overlaying="y",
side="right"
),
hovermode="x unified",
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="center",
x=0.5
)
)
st.plotly_chart(fig, use_container_width=True)
# Add observations
st.markdown("""
**Key Observations:**
- Peak demand typically occurs between 15:00-18:00 (3-6 PM)
- Early morning hours (4-5 AM) often show higher average ride values
- Morning rush hour (6-9 AM) shows moderate demand with variable values
""")
with tab2:
# Generate daily demand data
daily_data = []
for day in range(7):
peak_hour = 17 if day < 5 else 22 # Weekday peak at 5pm, weekend peak at 10pm
day_recs = model.predict(day, peak_hour, top_n=100)
total_demand = sum(rec["predicted_rides"] for rec in day_recs)
avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in day_recs) / total_demand if total_demand > 0 else 0
daily_data.append({
"day": days[day],
"demand": total_demand,
"value": avg_value
})
daily_df = pd.DataFrame(daily_data)
# Create bar chart
fig = px.bar(
daily_df,
x="day",
y="demand",
color="value",
color_continuous_scale="Viridis",
labels={
"day": "Day of Week",
"demand": "Peak Demand (Expected Rides)",
"value": "Avg Value (€)"
},
title="Peak Demand by Day of Week"
)
# Highlight selected day
fig.add_vline(
x=selected_day,
line_width=2,
line_dash="dash",
line_color="red",
annotation_text="Selected Day",
annotation_position="top right"
)
# Update layout
fig.update_layout(
xaxis=dict(categoryorder="array", categoryarray=days),
coloraxis_colorbar=dict(title="Avg Value (€)")
)
st.plotly_chart(fig, use_container_width=True)
# Add observations
st.markdown("""
**Key Observations:**
- Weekends (especially Saturday) typically show higher demand
- Tuesday and Thursday often have higher average ride values
- Weekend nights show different demand patterns than weekday nights
""")
# Footer section with additional information
st.markdown('', unsafe_allow_html=True)
tips_col1, tips_col2, tips_col3 = st.columns(3)
with tips_col1:
st.markdown('', unsafe_allow_html=True)
st.subheader("Best Times")
st.markdown("""
- **Weekdays**: 7-9 AM, 4-6 PM
- **Weekends**: 10 PM - 2 AM
- **High Value**: Tuesday & Thursday early morning (4-5 AM) and late night (10 PM-12 AM)
""")
st.markdown('
', unsafe_allow_html=True)
with tips_col2:
st.markdown('', unsafe_allow_html=True)
st.subheader("Best Areas")
st.markdown("""
- **City Center**: Consistent demand throughout the day
- **University Area**: Higher value rides, especially weekdays
- **Business District**: Good during morning rush hours
""")
st.markdown('
', unsafe_allow_html=True)
with tips_col3:
st.markdown('', unsafe_allow_html=True)
st.subheader("Strategy Tips")
st.markdown("""
- Position 5-10 minutes before peak times
- Balance high-volume vs high-value areas
- For longer shifts, start with high-value rides then switch to high-volume
""")
st.markdown('
', unsafe_allow_html=True)
if __name__ == "__main__":
main()