import streamlit as st import asyncio import tokonomics from utils import create_model_hierarchy from utils_on import analyze_hf_model # New import for On Premise Estimator functionality st.set_page_config(page_title="LLM Pricing Calculator", layout="wide") # -------------------------- # Async Data Loading Function # -------------------------- async def load_data(): """Simulate loading data asynchronously.""" AVAILABLE_MODELS = await tokonomics.get_available_models() hierarchy = create_model_hierarchy(AVAILABLE_MODELS) FILTERED_MODELS = [] MODEL_PRICING = {} PROVIDERS = list(hierarchy.keys()) for provider in PROVIDERS: for model_family in hierarchy[provider]: for model_version in hierarchy[provider][model_family].keys(): for region in hierarchy[provider][model_family][model_version]: model_id = hierarchy[provider][model_family][model_version][region] MODEL_PRICING[model_id] = await tokonomics.get_model_costs(model_id) FILTERED_MODELS.append(model_id) return FILTERED_MODELS, MODEL_PRICING, PROVIDERS # -------------------------- # Provider Change Function # -------------------------- def provider_change(provider, selected_type, all_types=["text", "vision", "video", "image"]): """Filter models based on the selected provider and type.""" all_models = st.session_state.get("models", []) new_models = [] others = [a_type for a_type in all_types if selected_type != a_type] for model_name in all_models: if provider in model_name: if selected_type in model_name: new_models.append(model_name) elif any(other in model_name for other in others): continue else: new_models.append(model_name) return new_models if new_models else all_models # -------------------------- # Estimate Cost Function # -------------------------- def estimate_cost(num_alerts, input_size, output_size, model_id): pricing = st.session_state.get("pricing", {}) cost_token = pricing.get(model_id) if not cost_token: return "NA" input_tokens = round(input_size * 1.3) output_tokens = round(output_size * 1.3) price_day = cost_token.get("input_cost_per_token", 0) * input_tokens + \ cost_token.get("output_cost_per_token", 0) * output_tokens price_total = price_day * num_alerts return f"""## Estimated Cost: Day Price: {price_total:0.2f} USD Month Price: {price_total * 31:0.2f} USD Year Price: {price_total * 365:0.2f} USD """ # -------------------------- # Load Data into Session State (only once) # -------------------------- if "data_loaded" not in st.session_state: with st.spinner("Loading pricing data..."): models, pricing, providers = asyncio.run(load_data()) st.session_state["models"] = models st.session_state["pricing"] = pricing st.session_state["providers"] = providers st.session_state["data_loaded"] = True # -------------------------- # Sidebar # -------------------------- with st.sidebar: st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg", use_container_width=True) st.markdown("Visit: [https://www.priam.ai](https://www.priam.ai)") st.divider() st.sidebar.title("LLM Pricing Calculator") # -------------------------- # Pills Navigation (Using st.pills) # -------------------------- # st.pills creates a pill-style selection widget. page = st.pills("Head", options=["Model Selection", "On Premise Estimator", "About"],selection_mode="single",default="Model Selection",label_visibility="hidden", #index=0 # Change index if you want a different default ) # -------------------------- # Helper: Format Analysis Report # -------------------------- def format_analysis_report(analysis_result: dict) -> str: """Convert the raw analysis_result dict into a human-readable report.""" if "error" in analysis_result: return f"**Error:** {analysis_result['error']}" lines = [] lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n") lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n") lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n") vram = analysis_result.get("vram_requirements", {}) lines.append("#### VRAM Requirements:") lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB") lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB") lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB") lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB") lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n") compatible_gpus = analysis_result.get("compatible_gpus", []) lines.append("#### Compatible GPUs:") if compatible_gpus: for gpu in compatible_gpus: lines.append(f"- {gpu}") else: lines.append("- None found") lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n") #gpu_perf = analysis_result.get("gpu_performance", {}) #if gpu_perf: # lines.append("#### GPU Performance:") # for gpu, perf in gpu_perf.items(): # lines.append(f"**{gpu}:**") # lines.append(f" - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}") # lines.append(f" - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}") # lines.append(f" - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n") #else: # lines.append("#### GPU Performance: N/A\n") return "\n".join(lines) # -------------------------- # Render Content Based on Selected Pill # -------------------------- if page == "Model Selection": st.divider() st.header("LLM Pricing App") # --- Row 1: Provider/Type and Model Selection --- col_left, col_right = st.columns(2) with col_left: selected_provider = st.selectbox( "Select a provider", st.session_state["providers"], index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0 ) selected_type = st.radio("Select type", options=["text", "image"], index=0) with col_right: filtered_models = provider_change(selected_provider, selected_type) if filtered_models: default_model = "o1" if "o1" in filtered_models else filtered_models[0] selected_model = st.selectbox("Select a model", options=filtered_models, index=filtered_models.index(default_model)) else: selected_model = None st.write("No models available") # --- Row 2: Alert Stats --- col1, col2, col3 = st.columns(3) with col1: num_alerts = st.number_input("Security Alerts Per Day", value=100, min_value=1, step=1, help="Number of security alerts to analyze daily") with col2: input_size = st.number_input("Alert Content Size (characters)", value=1000, min_value=1, step=1, help="Include logs, metadata, and context per alert") with col3: output_size = st.number_input("Analysis Output Size (characters)", value=500, min_value=1, step=1, help="Expected length of security analysis and recommendations") # --- Row 3: Buttons --- btn_col1, btn_col2 = st.columns(2) with btn_col1: if st.button("Estimate"): if selected_model: st.session_state["result"] = estimate_cost(num_alerts, input_size, output_size, selected_model) else: st.session_state["result"] = "No model selected." with btn_col2: if st.button("Refresh Pricing Data"): with st.spinner("Refreshing pricing data..."): models, pricing, providers = asyncio.run(load_data()) st.session_state["models"] = models st.session_state["pricing"] = pricing st.session_state["providers"] = providers st.success("Pricing data refreshed!") st.divider() st.markdown("### Results") if "result" in st.session_state: st.write(st.session_state["result"]) else: st.write("Use the buttons above to estimate costs.") if st.button("Clear"): st.session_state.pop("result", None) elif page == "On Premise Estimator": st.divider() st.header("On Premise Estimator") st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.") hf_model_id = st.text_input("Hugging Face Model ID", value="meta-llama/Llama-4-Scout-17B-16E") if st.button("Analyze Model"): with st.spinner("Analyzing model..."): analysis_result = analyze_hf_model(hf_model_id) st.session_state["analysis_result"] = analysis_result if "analysis_result" in st.session_state: report = format_analysis_report(st.session_state["analysis_result"]) st.markdown(report) elif page == "About": st.divider() st.markdown( """ ## About This App This is based on the tokonomics package. - The app downloads the latest pricing from the LiteLLM repository. - Using simple maths to estimate the total tokens. - Helps you estimate hardware requirements for running open-source large language models (LLMs) on-premise using only the model ID from Hugging Face. - Latest Version 0.1 --- ### 📌 Version History | Version | Release Date | Key Feature Updates | |--------|--------------|---------------------| | `v1.1` | 2025-04-06 | Added On Premise Estimator Feature | | `v1.0` | 2025-03-26 | Initial release with basic total tokens estimation | --- Website: [https://www.priam.ai](https://www.priam.ai) """ ) st.markdown( """ ### Found a Bug? If you encounter any issues or have feedback, please email to **Ashfaaq@priam.ai** Your input helps us improve the app! """ )