File size: 3,357 Bytes
8b08df1
 
 
 
90a2425
 
3bece4b
 
 
8b08df1
90a2425
 
 
 
 
3610bdd
 
 
 
eeed2d4
 
 
5824669
 
 
 
 
 
 
 
 
e089e4a
810b759
 
 
 
e089e4a
 
5824669
8b08df1
 
 
5824669
8b08df1
 
 
 
3bece4b
8b08df1
 
5824669
3bece4b
8b08df1
810b759
 
8b08df1
5824669
 
810b759
5824669
 
 
 
 
8b08df1
5824669
3bece4b
8b08df1
 
ce54f96
 
 
 
 
 
 
 
3610bdd
ce54f96
 
 
3bece4b
 
 
 
 
 
 
ce54f96
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pandas as pd
import folium
from sklearn.cluster import KMeans
from folium.plugins import MarkerCluster
import requests
from io import BytesIO
import streamlit as st
import folium
from streamlit.components.v1 import html

# Load data from Excel (directly from the URL)
def load_data(url):
    # Request the file content
    response = requests.get(url)
    
    # Check if the content is an Excel file by inspecting the MIME type
    if 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' not in response.headers['Content-Type']:
        raise ValueError("The file is not a valid Excel file.")
    
    # Read the file content into a pandas dataframe with the engine specified
    lat_long_data = pd.read_excel(BytesIO(response.content), sheet_name="lat long", engine='openpyxl')
    measurement_data = pd.read_excel(BytesIO(response.content), sheet_name="measurement data", engine='openpyxl')
    
    # Merge data on school_id_giga
    merged_data = pd.merge(
        lat_long_data,
        measurement_data,
        left_on="school_id_giga",
        right_on="school_id_giga",
        how="inner"
    )
    
    # Strip whitespace from all column names
    merged_data.columns = merged_data.columns.str.strip()
    
    # Print columns to check the available columns in the data
    print("Columns in merged data:", merged_data.columns)
    
    return merged_data

# Perform clustering to find data center location
def find_data_center(df, n_clusters=1):
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(df[["latitude", "longitude"]])
    return kmeans.cluster_centers_

# Create a map and plot the points
def plot_map(df, center):
    # Create map centered on the data center location
    map = folium.Map(location=[center[0][0], center[0][1]], zoom_start=10)
    marker_cluster = MarkerCluster().add_to(map)
    
    # Add school locations to the map
    for idx, row in df.iterrows():
        school_name = row.get("school_name", "No Name Provided")  # Ensure correct column access
        
        folium.Marker(
            location=[row["latitude"], row["longitude"]],
            popup=(
                f"School Name: {school_name}<br>"
                f"Download Speed: {row['download_speed']} Mbps<br>"
                f"Upload Speed: {row['upload_speed']} Mbps<br>"
                f"Latency: {row['latency']} ms"
            ),
            icon=folium.Icon(color="blue", icon="info-sign")
        ).add_to(marker_cluster)
    
    # Add data center location to the map
    folium.Marker(
        location=[center[0][0], center[0][1]],
        popup="Proposed Data Center",
        icon=folium.Icon(color="red", icon="cloud")
    ).add_to(map)

    return map

# Main function to run the application
def main():
    url = "https://huggingface.co/spaces/engralimalik/lace/resolve/main/data%20barbados.xlsx"  # Correct raw file URL
    df = load_data(url)
    center = find_data_center(df)
    map = plot_map(df, center)

    # Embed the map directly in the Streamlit app
    map_html = map._repr_html_()  # Render the folium map as HTML
    html(map_html, width=700, height=500)  # Adjust the size of the embedded map

    st.title("Data Center Location Mapping")
    st.write("This map shows school locations and proposed data center locations based on clustering.")

if __name__ == "__main__":
    main()