Spaces:

Shreneek
/

cabpriceanalysis

Sleeping

File size: 26,327 Bytes

# streamlit_app.py - Bolt Driver Recommendation System
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import folium
from folium.plugins import HeatMap, MarkerCluster
from streamlit_folium import folium_static
import pickle
import os

# Set page configuration
st.set_page_config(
    page_title="Bolt Driver Recommendation System",
    page_icon="🚖",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS styling
st.markdown("""
<style>
    .main-header {
        font-size: 2.5rem;
        color: #272D37;
        text-align: center;
        margin-bottom: 1rem;
        font-weight: bold;
    }
    .sub-header {
        font-size: 1.8rem;
        color: #272D37;
        margin-top: 1.5rem;
        margin-bottom: 1rem;
    }
    .section-header {
        font-size: 1.3rem;
        color: #272D37;
        margin-top: 1rem;
        margin-bottom: 0.5rem;
        font-weight: bold;
    }
    .highlight {
        background-color: #F0F2F6;
        padding: 1rem;
        border-radius: 0.5rem;
        margin-bottom: 1rem;
    }
    .card {
        background-color: white;
        border-radius: 0.5rem;
        padding: 1.5rem;
        box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15);
        margin-bottom: 1rem;
    }
    .info-box {
        background-color: #e8f4f8;
        border-left: 5px solid #4e8cff;
        padding: 0.8rem;
        border-radius: 0.3rem;
        margin-bottom: 1rem;
    }
    .metric-container {
        display: flex;
        justify-content: space-between;
        gap: 1rem;
    }
    .metric-card {
        background-color: white;
        border-radius: 0.5rem;
        padding: 1rem;
        text-align: center;
        box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15);
        flex: 1;
    }
    .metric-value {
        font-size: 1.8rem;
        font-weight: bold;
        color: #272D37;
    }
    .metric-label {
        font-size: 0.9rem;
        color: #6e707e;
    }
</style>
""", unsafe_allow_html=True)

# Header and app description
st.markdown('<div class="main-header">Bolt Driver Recommendation System</div>', unsafe_allow_html=True)

with st.container():
    st.markdown('<div class="info-box">This application helps Bolt drivers find optimal areas to position themselves based on predicted ride demand and value. The recommendations are personalized based on time, location, and driver preferences.</div>', unsafe_allow_html=True)

class DemandPredictionModel:
    def __init__(self):
        """Initialize the demand prediction model"""
        # In a real app, we would load the model from a file
        # Here we'll create a dummy version for demonstration
        self.setup_demo_data()
        
    def setup_demo_data(self):
        """Set up demonstration data based on our analysis"""
        # Define geographic boundaries (Tallinn)
        self.min_lat, self.max_lat = 59.32, 59.57
        self.min_lng, self.max_lng = 24.51, 24.97
        
        # Create grid
        grid_size = 10
        self.lat_step = (self.max_lat - self.min_lat) / grid_size
        self.lng_step = (self.max_lng - self.min_lng) / grid_size
        
        # Generate lat/lng bins
        self.lat_bins = np.linspace(self.min_lat, self.max_lat, grid_size + 1)
        self.lng_bins = np.linspace(self.min_lng, self.max_lng, grid_size + 1)
        
        # Create demand patterns based on our findings
        self.demand_patterns = self.create_demand_patterns()
        
    def create_demand_patterns(self):
        """Create realistic demand patterns based on our analysis"""
        # Initialize 4D array: [day_of_week][hour][lat_bin][lng_bin]
        days = 7
        hours = 24
        lat_bins = len(self.lat_bins) - 1
        lng_bins = len(self.lng_bins) - 1
        
        demand_patterns = np.zeros((days, hours, lat_bins, lng_bins))
        value_patterns = np.zeros((days, hours, lat_bins, lng_bins))
        
        # Key areas from our analysis
        city_center = {"lat_idx": 4, "lng_idx": 5, "base_demand": 300, "value": 1.91}
        secondary_hub = {"lat_idx": 4, "lng_idx": 4, "base_demand": 150, "value": 1.94}
        university_area = {"lat_idx": 3, "lng_idx": 4, "base_demand": 80, "value": 2.89}
        residential_zone = {"lat_idx": 3, "lng_idx": 3, "base_demand": 60, "value": 1.85}
        business_district = {"lat_idx": 4, "lng_idx": 6, "base_demand": 50, "value": 1.56}
        
        hotspots = [city_center, secondary_hub, university_area, residential_zone, business_district]
        
        # Time patterns
        hourly_factors = {
            0: 0.5, 1: 0.4, 2: 0.3, 3: 0.3, 4: 0.3, 5: 0.5,
            6: 0.8, 7: 0.9, 8: 0.7, 9: 0.6, 10: 0.6, 11: 0.6,
            12: 0.7, 13: 0.8, 14: 0.9, 15: 1.0, 16: 1.0, 17: 0.8,
            18: 0.7, 19: 0.7, 20: 0.7, 21: 0.8, 22: 0.9, 23: 0.7
        }
        
        # Value patterns - certain times have higher values
        value_factors = {
            0: 1.4, 1: 0.8, 2: 1.0, 3: 0.6, 4: 1.6, 5: 0.7,
            6: 0.9, 7: 1.1, 8: 1.0, 9: 0.7, 10: 0.8, 11: 1.1,
            12: 0.8, 13: 0.9, 14: 1.6, 15: 0.9, 16: 0.8, 17: 1.0,
            18: 0.8, 19: 0.7, 20: 1.1, 21: 0.8, 22: 1.0, 23: 1.2
        }
        
        # Day patterns
        day_factors = {
            0: 0.8,  # Monday
            1: 0.9,  # Tuesday
            2: 0.9,  # Wednesday
            3: 0.85, # Thursday
            4: 0.95, # Friday
            5: 1.0,  # Saturday
            6: 0.8   # Sunday
        }
        
        # Fill the demand patterns
        for day in range(days):
            for hour in range(hours):
                # Apply base patterns with temporal variations
                time_factor = hourly_factors[hour] * day_factors[day]
                
                # Add some specific day-hour combinations
                # Tuesday and Thursday early morning and late night have higher values
                special_value_factor = 1.0
                if (day == 1 or day == 3) and (hour in [4, 22, 23]):
                    special_value_factor = 2.0
                
                for spot in hotspots:
                    lat_idx, lng_idx = spot["lat_idx"], spot["lng_idx"]
                    base_demand = spot["base_demand"]
                    base_value = spot["value"]
                    
                    # Set demand
                    demand = base_demand * time_factor
                    # Add some randomness
                    demand *= np.random.uniform(0.9, 1.1)
                    demand_patterns[day, hour, lat_idx, lng_idx] = demand
                    
                    # Set value
                    value = base_value * value_factors[hour] * special_value_factor
                    # Add some randomness
                    value *= np.random.uniform(0.95, 1.05)
                    value_patterns[day, hour, lat_idx, lng_idx] = value
                    
                    # Add some spillover to neighboring cells
                    for d_lat in [-1, 0, 1]:
                        for d_lng in [-1, 0, 1]:
                            if d_lat == 0 and d_lng == 0:
                                continue
                                
                            n_lat = lat_idx + d_lat
                            n_lng = lng_idx + d_lng
                            
                            if (0 <= n_lat < lat_bins and 0 <= n_lng < lng_bins):
                                # Spillover decreases with distance
                                distance = np.sqrt(d_lat**2 + d_lng**2)
                                spillover_factor = 0.5 / distance
                                
                                demand_patterns[day, hour, n_lat, n_lng] += demand * spillover_factor
                                value_patterns[day, hour, n_lat, n_lng] += value * 0.9  # Slightly lower values in spillover areas
        
        # Create combined dict
        patterns = {
            "demand": demand_patterns,
            "value": value_patterns
        }
        
        return patterns
        
    def predict(self, day, hour, current_lat=None, current_lng=None, value_weight=0.5, top_n=5):
        """
        Predict high-demand areas for a given day and hour
        
        Parameters:
        - day: Day of week (0=Monday, 6=Sunday)
        - hour: Hour of day (0-23)
        - current_lat: Driver's current latitude (optional)
        - current_lng: Driver's current longitude (optional)
        - value_weight: Weight for balancing demand vs value (0-1)
        - top_n: Number of recommendations to return
        
        Returns:
        - List of recommended areas
        """
        demand_matrix = self.demand_patterns["demand"][day, hour]
        value_matrix = self.demand_patterns["value"][day, hour]
        
        # Flatten the matrices for ranking
        recommendations = []
        
        for lat_idx in range(len(self.lat_bins) - 1):
            for lng_idx in range(len(self.lng_bins) - 1):
                demand = demand_matrix[lat_idx, lng_idx]
                value = value_matrix[lat_idx, lng_idx]
                
                if demand > 0:
                    center_lat = (self.lat_bins[lat_idx] + self.lat_bins[lat_idx + 1]) / 2
                    center_lng = (self.lng_bins[lng_idx] + self.lng_bins[lng_idx + 1]) / 2
                    
                    # Calculate distance if driver location provided
                    distance_km = None
                    if current_lat is not None and current_lng is not None:
                        # Calculate Haversine distance
                        R = 6371  # Earth radius in kilometers
                        dLat = np.radians(current_lat - center_lat)
                        dLon = np.radians(current_lng - center_lng)
                        a = (np.sin(dLat/2) * np.sin(dLat/2) + 
                             np.cos(np.radians(current_lat)) * np.cos(np.radians(center_lat)) * 
                             np.sin(dLon/2) * np.sin(dLon/2))
                        c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
                        distance_km = R * c
                    
                    # Scale demand and value for scoring
                    max_demand = np.max(demand_matrix)
                    max_value = np.max(value_matrix)
                    
                    demand_score = demand / max_demand if max_demand > 0 else 0
                    value_score = value / max_value if max_value > 0 else 0
                    
                    # Combined score based on value weight
                    score = (1 - value_weight) * demand_score + value_weight * value_score
                    
                    # Adjust for distance if available
                    if distance_km is not None:
                        # Distance penalty (decreases as distance increases)
                        # Effective range ~10km
                        distance_penalty = 1.0 / (1.0 + distance_km / 5.0)
                        adjusted_score = score * distance_penalty
                    else:
                        adjusted_score = score
                    
                    recommendations.append({
                        "center_lat": center_lat,
                        "center_lng": center_lng,
                        "predicted_rides": demand,
                        "avg_value": value,
                        "expected_value": demand * value,
                        "score": score,
                        "adjusted_score": adjusted_score,
                        "distance_km": distance_km
                    })
        
        # Sort by adjusted score
        sorted_recommendations = sorted(recommendations, key=lambda x: x["adjusted_score"], reverse=True)
        
        return sorted_recommendations[:top_n]

# Main application flow
def main():
    # Initialize model
    model = DemandPredictionModel()
    
    # Sidebar for inputs
    with st.sidebar:
        st.markdown('<div class="section-header">Driver Options</div>', unsafe_allow_html=True)
        
        # Time selection
        st.subheader("Time Selection")
        
        today = datetime.now()
        days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
        selected_day = st.selectbox("Day of Week", days, index=today.weekday())
        day_idx = days.index(selected_day)
        
        selected_hour = st.slider("Hour of Day", 0, 23, today.hour, format="%d:00")
        
        # Location input
        st.subheader("Driver Location")
        use_location = st.checkbox("Use Current Location", value=True)
        
        # Default to Tallinn center
        default_lat, default_lng = 59.436, 24.753
        
        if use_location:
            col1, col2 = st.columns(2)
            with col1:
                current_lat = st.number_input("Latitude", value=default_lat, format="%.5f", step=0.001)
            with col2:
                current_lng = st.number_input("Longitude", value=default_lng, format="%.5f", step=0.001)
        else:
            current_lat, current_lng = None, None
        
        # Preference settings
        st.subheader("Preferences")
        
        num_recommendations = st.slider("Number of Recommendations", 3, 10, 5)
        
        value_weight = st.slider(
            "Optimization Balance", 
            min_value=0.0, 
            max_value=1.0, 
            value=0.5, 
            step=0.1,
            help="0 = Focus on ride count, 1 = Focus on ride value"
        )
        
        # Advanced options for visual
        st.subheader("Display Options")
        show_heatmap = st.checkbox("Show Demand Heatmap", value=True)
        
    # Generate recommendations
    recommendations = model.predict(
        day=day_idx,
        hour=selected_hour,
        current_lat=current_lat if use_location else None,
        current_lng=current_lng if use_location else None,
        value_weight=value_weight,
        top_n=num_recommendations
    )
    
    # Main content area
    col1, col2 = st.columns([3, 2])
    
    with col1:
        st.markdown('<div class="section-header">Demand Map</div>', unsafe_allow_html=True)
        
        try:
            # Create map
            m = folium.Map(
                location=[59.436, 24.753],  # Tallinn center
                zoom_start=12,
                tiles="CartoDB positron"
            )
            
            # Add driver marker if location provided
            if use_location:
                folium.Marker(
                    location=[current_lat, current_lng],
                    popup="Your Location",
                    icon=folium.Icon(color="blue", icon="user", prefix="fa"),
                    tooltip="Your Current Location"
                ).add_to(m)
            
            # Add recommendation markers
            for i, rec in enumerate(recommendations):
                folium.CircleMarker(
                    location=[rec["center_lat"], rec["center_lng"]],
                    radius=20,
                    color="red",
                    fill=True,
                    fill_color="red",
                    fill_opacity=0.6,
                    popup=f"""
                    <b>Recommendation {i+1}</b><br>
                    Expected rides: {rec['predicted_rides']:.1f}<br>
                    Avg value: €{rec['avg_value']:.2f}<br>
                    Expected value: €{rec['expected_value']:.2f}<br>
                    {f'Distance: {rec["distance_km"]:.2f} km' if rec["distance_km"] is not None else ''}
                    """
                ).add_to(m)
                
                # Add number label - using HTML directly to avoid the split error
                folium.Marker(
                    location=[rec["center_lat"], rec["center_lng"]],
                    icon=folium.DivIcon(
                        html=f'<div style="font-size:12pt;color:white;font-weight:bold;text-align:center;width:25px;height:25px;line-height:25px;">{i+1}</div>'
                    )
                ).add_to(m)
            
            # Add heatmap if enabled
            if show_heatmap:
                # Get a larger set of predictions for the heatmap
                all_predictions = model.predict(day_idx, selected_hour, top_n=100)
                heat_data = [
                    [pred["center_lat"], pred["center_lng"], pred["predicted_rides"]] 
                    for pred in all_predictions
                ]
                
                # Add heatmap layer
                HeatMap(
                    heat_data,
                    radius=15,
                    gradient={
                        0.2: 'blue',
                        0.4: 'lime',
                        0.6: 'yellow',
                        0.8: 'orange',
                        1.0: 'red'
                    },
                    name="Demand Heatmap",
                    show=True
                ).add_to(m)
            
            # Add layer control
            folium.LayerControl().add_to(m)
            
            # Display the map
            folium_static(m, width=700)
            
        except Exception as e:
            st.error(f"Error rendering map: {e}")
            st.info("Showing tabular results instead.")
    
    with col2:
        st.markdown('<div class="section-header">Recommendations</div>', unsafe_allow_html=True)
        
        # Create metrics for top recommendation
        if recommendations:
            top_rec = recommendations[0]
            
            st.markdown('<div class="highlight">', unsafe_allow_html=True)
            st.subheader("Top Recommendation")
            
            col1, col2 = st.columns(2)
            with col1:
                st.metric("Expected Rides", f"{top_rec['predicted_rides']:.1f}")
                st.metric("Avg Value", f"€{top_rec['avg_value']:.2f}")
            with col2:
                st.metric("Expected Value", f"€{top_rec['expected_value']:.2f}")
                if top_rec["distance_km"] is not None:
                    st.metric("Distance", f"{top_rec['distance_km']:.2f} km")
            
            st.markdown(f"Location: [{top_rec['center_lat']:.4f}, {top_rec['center_lng']:.4f}]")
            st.markdown('</div>', unsafe_allow_html=True)
        
        # Create formatted table of all recommendations
        st.subheader("All Recommendations")
        
        rec_df = pd.DataFrame(recommendations)
        
        # Format for display
        display_df = pd.DataFrame({
            "Rank": range(1, len(rec_df) + 1),
            "Expected Rides": rec_df["predicted_rides"].round(1),
            "Avg Value (€)": rec_df["avg_value"].round(2),
            "Expected Value (€)": rec_df["expected_value"].round(2)
        })
        
        # Add distance if available
        if "distance_km" in rec_df.columns and rec_df["distance_km"].notna().any():
            display_df["Distance (km)"] = rec_df["distance_km"].round(2)
        
        st.table(display_df)
        
        # Add explanation for score calculation
        st.markdown('<div class="info-box">', unsafe_allow_html=True)
        st.markdown("**How recommendations are calculated:**")
        st.markdown("""
        - Ride count predictions based on historical patterns
        - Value based on average ride fares
        - Recommendations balanced by your preferences
        - Distance factored in when location is provided
        """)
        st.markdown('</div>', unsafe_allow_html=True)
    
    # Time series visualization
    st.markdown('<div class="section-header">Demand Patterns Analysis</div>', unsafe_allow_html=True)
    
    tab1, tab2 = st.tabs(["Hourly Patterns", "Daily Patterns"])
    
    with tab1:
        # Generate hourly demand data for the selected day
        hourly_data = []
        for hour in range(24):
            hour_recs = model.predict(day_idx, hour, top_n=100)
            total_demand = sum(rec["predicted_rides"] for rec in hour_recs)
            avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in hour_recs) / total_demand if total_demand > 0 else 0
            
            hourly_data.append({
                "hour": hour,
                "demand": total_demand,
                "value": avg_value
            })
        
        hourly_df = pd.DataFrame(hourly_data)
        
        # Create dual-axis chart
        fig = go.Figure()
        
        # Add demand line
        fig.add_trace(go.Scatter(
            x=hourly_df["hour"],
            y=hourly_df["demand"],
            name="Demand",
            line=dict(color="#4e8cff", width=3),
            hovertemplate="Hour: %{x}<br>Demand: %{y:.1f}<extra></extra>"
        ))
        
        # Add value line on secondary axis
        fig.add_trace(go.Scatter(
            x=hourly_df["hour"],
            y=hourly_df["value"],
            name="Avg Value (€)",
            line=dict(color="#ff6b6b", width=3, dash="dot"),
            yaxis="y2",
            hovertemplate="Hour: %{x}<br>Avg Value: €%{y:.2f}<extra></extra>"
        ))
        
        # Highlight selected hour
        fig.add_vline(
            x=selected_hour,
            line_width=2,
            line_dash="dash",
            line_color="green",
            annotation_text="Selected Hour",
            annotation_position="top right"
        )
        
        # Update layout
        fig.update_layout(
            title=f"Hourly Demand Pattern for {selected_day}",
            xaxis=dict(
                title="Hour of Day",
                tickmode="linear",
                tick0=0,
                dtick=1
            ),
            yaxis=dict(
                title="Demand (Expected Rides)",
                titlefont=dict(color="#4e8cff"),
                tickfont=dict(color="#4e8cff")
            ),
            yaxis2=dict(
                title="Average Value (€)",
                titlefont=dict(color="#ff6b6b"),
                tickfont=dict(color="#ff6b6b"),
                anchor="x",
                overlaying="y",
                side="right"
            ),
            hovermode="x unified",
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="center",
                x=0.5
            )
        )
        
        st.plotly_chart(fig, use_container_width=True)
        
        # Add observations
        st.markdown("""
        **Key Observations:**
        - Peak demand typically occurs between 15:00-18:00 (3-6 PM)
        - Early morning hours (4-5 AM) often show higher average ride values
        - Morning rush hour (6-9 AM) shows moderate demand with variable values
        """)
    
    with tab2:
        # Generate daily demand data
        daily_data = []
        for day in range(7):
            peak_hour = 17 if day < 5 else 22  # Weekday peak at 5pm, weekend peak at 10pm
            day_recs = model.predict(day, peak_hour, top_n=100)
            total_demand = sum(rec["predicted_rides"] for rec in day_recs)
            avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in day_recs) / total_demand if total_demand > 0 else 0
            
            daily_data.append({
                "day": days[day],
                "demand": total_demand,
                "value": avg_value
            })
        
        daily_df = pd.DataFrame(daily_data)
        
        # Create bar chart
        fig = px.bar(
            daily_df,
            x="day",
            y="demand",
            color="value",
            color_continuous_scale="Viridis",
            labels={
                "day": "Day of Week",
                "demand": "Peak Demand (Expected Rides)",
                "value": "Avg Value (€)"
            },
            title="Peak Demand by Day of Week"
        )
        
        # Highlight selected day
        fig.add_vline(
            x=selected_day,
            line_width=2,
            line_dash="dash",
            line_color="red",
            annotation_text="Selected Day",
            annotation_position="top right"
        )
        
        # Update layout
        fig.update_layout(
            xaxis=dict(categoryorder="array", categoryarray=days),
            coloraxis_colorbar=dict(title="Avg Value (€)")
        )
        
        st.plotly_chart(fig, use_container_width=True)
        
        # Add observations
        st.markdown("""
        **Key Observations:**
        - Weekends (especially Saturday) typically show higher demand
        - Tuesday and Thursday often have higher average ride values
        - Weekend nights show different demand patterns than weekday nights
        """)
    
    # Footer section with additional information
    st.markdown('<div class="section-header">Tips for Drivers</div>', unsafe_allow_html=True)
    
    tips_col1, tips_col2, tips_col3 = st.columns(3)
    
    with tips_col1:
        st.markdown('<div class="card">', unsafe_allow_html=True)
        st.subheader("Best Times")
        st.markdown("""
        - **Weekdays**: 7-9 AM, 4-6 PM
        - **Weekends**: 10 PM - 2 AM
        - **High Value**: Tuesday & Thursday early morning (4-5 AM) and late night (10 PM-12 AM)
        """)
        st.markdown('</div>', unsafe_allow_html=True)
    
    with tips_col2:
        st.markdown('<div class="card">', unsafe_allow_html=True)
        st.subheader("Best Areas")
        st.markdown("""
        - **City Center**: Consistent demand throughout the day
        - **University Area**: Higher value rides, especially weekdays
        - **Business District**: Good during morning rush hours
        """)
        st.markdown('</div>', unsafe_allow_html=True)
    
    with tips_col3:
        st.markdown('<div class="card">', unsafe_allow_html=True)
        st.subheader("Strategy Tips")
        st.markdown("""
        - Position 5-10 minutes before peak times
        - Balance high-volume vs high-value areas
        - For longer shifts, start with high-value rides then switch to high-volume
        """)
        st.markdown('</div>', unsafe_allow_html=True)

if __name__ == "__main__":
    main()