|
import asyncio |
|
import streamlit as st |
|
import pandas as pd |
|
import time |
|
from typing import Optional, List, Set, Tuple, Dict, Any |
|
|
|
from .components.filters import render_table_filters |
|
from .components.visualizations import ( |
|
render_leaderboard_table, |
|
render_device_rankings, |
|
) |
|
from .components.header import render_header, render_contribution_guide |
|
from .components.device_comparison import render_device_comparison |
|
from .services.firebase import fetch_leaderboard_data |
|
from .core.styles import CUSTOM_CSS |
|
|
|
|
|
def get_filter_values( |
|
df: pd.DataFrame, |
|
) -> tuple[ |
|
List[str], |
|
List[str], |
|
List[str], |
|
List[str], |
|
List[str], |
|
Tuple[int, int], |
|
Tuple[int, int], |
|
Tuple[int, int], |
|
List[str], |
|
int, |
|
]: |
|
"""Get unique values for filters""" |
|
models = sorted(df["Model ID"].unique().tolist()) |
|
platforms = sorted(df["Platform"].unique().tolist()) |
|
devices = sorted(df["Device"].unique().tolist()) |
|
cache_type_v = sorted(df["cache_type_v"].unique().tolist()) |
|
cache_type_k = sorted(df["cache_type_k"].unique().tolist()) |
|
n_threads = (df["n_threads"].min(), df["n_threads"].max()) |
|
max_n_gpu_layers = (0, max(df["n_gpu_layers"].unique().tolist())) |
|
pp_range = (df["PP Config"].min(), df["PP Config"].max()) |
|
tg_range = (df["TG Config"].min(), df["TG Config"].max()) |
|
versions = sorted(df["Version"].unique().tolist()) |
|
return ( |
|
models, |
|
platforms, |
|
devices, |
|
cache_type_v, |
|
cache_type_k, |
|
pp_range, |
|
tg_range, |
|
n_threads, |
|
versions, |
|
max_n_gpu_layers, |
|
) |
|
|
|
|
|
def render_performance_metrics(metrics: Dict[str, Any]): |
|
"""Render performance metrics in a nice grid""" |
|
st.markdown("### 🏆 Performance Overview") |
|
|
|
col1, col2, col3, col4, col5 = st.columns(5) |
|
|
|
with col1: |
|
st.metric("🏆 Top Device", metrics["top_device"]) |
|
with col2: |
|
st.metric("Top Score", f"{metrics['top_score']:.1f}") |
|
with col3: |
|
st.metric("Average Score", f"{metrics['avg_score']:.1f}") |
|
with col4: |
|
st.metric("Total Devices", metrics["total_devices"]) |
|
with col5: |
|
st.metric("Total Models", metrics["total_models"]) |
|
|
|
|
|
async def get_cached_data(): |
|
"""Fetch and cache the leaderboard data""" |
|
current_time = time.time() |
|
|
|
|
|
if ( |
|
"leaderboard_data" in st.session_state |
|
and st.session_state.leaderboard_data is not None |
|
and (current_time - st.session_state.data_timestamp) < 3600 |
|
): |
|
return st.session_state.leaderboard_data |
|
|
|
|
|
df = await fetch_leaderboard_data() |
|
st.session_state.leaderboard_data = df |
|
st.session_state.data_timestamp = current_time |
|
return df |
|
|
|
|
|
async def main(): |
|
"""Main application entry point""" |
|
st.set_page_config( |
|
page_title="AI Phone Benchmark Leaderboard", |
|
page_icon="📱", |
|
layout="wide", |
|
) |
|
|
|
|
|
if "leaderboard_data" not in st.session_state: |
|
st.session_state.leaderboard_data = None |
|
st.session_state.data_timestamp = 0 |
|
|
|
|
|
st.markdown(CUSTOM_CSS, unsafe_allow_html=True) |
|
|
|
|
|
df = await get_cached_data() |
|
|
|
if df.empty: |
|
st.error("No data available. Please check your connection and try again.") |
|
return |
|
|
|
|
|
render_header() |
|
|
|
|
|
( |
|
models, |
|
platforms, |
|
devices, |
|
cache_type_v, |
|
cache_type_k, |
|
pp_range, |
|
tg_range, |
|
n_threads, |
|
versions, |
|
max_n_gpu_layers, |
|
) = get_filter_values(df) |
|
|
|
|
|
if "show_guide" not in st.session_state: |
|
st.session_state.show_guide = True |
|
|
|
main_col, guide_col = st.columns( |
|
[ |
|
0.9 if not st.session_state.show_guide else 0.8, |
|
0.1 if not st.session_state.show_guide else 0.2, |
|
] |
|
) |
|
|
|
with main_col: |
|
|
|
tab1, tab2, tab3 = st.tabs( |
|
[ |
|
"Device Rankings", |
|
"Benchmark Results", |
|
"⚔️ Device Duel", |
|
] |
|
) |
|
|
|
with tab1: |
|
|
|
st.title(" Device Rankings") |
|
|
|
|
|
st.markdown( |
|
""" |
|
<div style="position: relative;"> |
|
<div style="margin-bottom: 10px;"> |
|
<a href="#" data-tooltip="Rankings calculated using Glicko-2 algorithm with standardized conditions: PP=512 tokens, TG=128 tokens" style="text-decoration: none; color: #888; font-size: 12px; border-bottom: 1px dotted #888;"> |
|
ℹ️ Ranking methodology |
|
</a> |
|
</div> |
|
</div> |
|
<style> |
|
[data-tooltip] { |
|
position: relative; |
|
cursor: pointer; |
|
} |
|
[data-tooltip]:hover::after { |
|
content: attr(data-tooltip); |
|
position: absolute; |
|
left: 0; |
|
top: 100%; |
|
background-color: #f8f9fa; |
|
border: 1px solid #dee2e6; |
|
border-radius: 4px; |
|
padding: 8px 12px; |
|
width: max-content; |
|
max-width: 300px; |
|
z-index: 100; |
|
font-size: 12px; |
|
color: #333; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1); |
|
} |
|
</style> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
render_device_rankings(df) |
|
|
|
with tab2: |
|
|
|
table_filters = render_table_filters( |
|
models, |
|
platforms, |
|
devices, |
|
cache_type_v, |
|
cache_type_k, |
|
pp_range, |
|
tg_range, |
|
n_threads, |
|
versions, |
|
max_n_gpu_layers, |
|
) |
|
|
|
|
|
render_leaderboard_table(df, table_filters) |
|
|
|
|
|
st.markdown("---") |
|
|
|
with tab3: |
|
|
|
|
|
normalized_device_ids = sorted(df["Normalized Device ID"].unique().tolist()) |
|
render_device_comparison(df, normalized_device_ids) |
|
|
|
with guide_col: |
|
render_contribution_guide() |
|
|
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |
|
|