|
import streamlit as st |
|
import asyncio |
|
import tokonomics |
|
from utils import create_model_hierarchy |
|
from utils_on import analyze_hf_model |
|
|
|
st.set_page_config(page_title="LLM Pricing Calculator", layout="wide") |
|
|
|
|
|
|
|
|
|
|
|
async def load_data(): |
|
"""Simulate loading data asynchronously.""" |
|
AVAILABLE_MODELS = await tokonomics.get_available_models() |
|
hierarchy = create_model_hierarchy(AVAILABLE_MODELS) |
|
FILTERED_MODELS = [] |
|
MODEL_PRICING = {} |
|
PROVIDERS = list(hierarchy.keys()) |
|
for provider in PROVIDERS: |
|
for model_family in hierarchy[provider]: |
|
for model_version in hierarchy[provider][model_family].keys(): |
|
for region in hierarchy[provider][model_family][model_version]: |
|
model_id = hierarchy[provider][model_family][model_version][region] |
|
MODEL_PRICING[model_id] = await tokonomics.get_model_costs(model_id) |
|
FILTERED_MODELS.append(model_id) |
|
return FILTERED_MODELS, MODEL_PRICING, PROVIDERS |
|
|
|
|
|
|
|
|
|
def provider_change(provider, selected_type, all_types=["text", "vision", "video", "image"]): |
|
"""Filter models based on the selected provider and type.""" |
|
all_models = st.session_state.get("models", []) |
|
new_models = [] |
|
others = [a_type for a_type in all_types if selected_type != a_type] |
|
for model_name in all_models: |
|
if provider in model_name: |
|
if selected_type in model_name: |
|
new_models.append(model_name) |
|
elif any(other in model_name for other in others): |
|
continue |
|
else: |
|
new_models.append(model_name) |
|
return new_models if new_models else all_models |
|
|
|
|
|
|
|
|
|
def estimate_cost(num_alerts, input_size, output_size, model_id): |
|
pricing = st.session_state.get("pricing", {}) |
|
cost_token = pricing.get(model_id) |
|
if not cost_token: |
|
return "NA" |
|
input_tokens = round(input_size * 1.3) |
|
output_tokens = round(output_size * 1.3) |
|
price_day = cost_token.get("input_cost_per_token", 0) * input_tokens + \ |
|
cost_token.get("output_cost_per_token", 0) * output_tokens |
|
price_total = price_day * num_alerts |
|
return f"""## Estimated Cost: |
|
|
|
Day Price: {price_total:0.2f} USD |
|
Month Price: {price_total * 31:0.2f} USD |
|
Year Price: {price_total * 365:0.2f} USD |
|
""" |
|
|
|
|
|
|
|
|
|
if "data_loaded" not in st.session_state: |
|
with st.spinner("Loading pricing data..."): |
|
models, pricing, providers = asyncio.run(load_data()) |
|
st.session_state["models"] = models |
|
st.session_state["pricing"] = pricing |
|
st.session_state["providers"] = providers |
|
st.session_state["data_loaded"] = True |
|
|
|
|
|
|
|
|
|
with st.sidebar: |
|
st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg", |
|
use_container_width=True) |
|
st.markdown("Visit: [https://www.priam.ai](https://www.priam.ai)") |
|
st.divider() |
|
st.sidebar.title("LLM Pricing Calculator") |
|
|
|
|
|
|
|
|
|
|
|
page = st.pills("Head", |
|
options=["Model Selection", "On Premise Estimator", "About"],selection_mode="single",default="Model Selection",label_visibility="hidden", |
|
|
|
) |
|
|
|
|
|
|
|
|
|
def format_analysis_report(analysis_result: dict) -> str: |
|
"""Convert the raw analysis_result dict into a human-readable report.""" |
|
if "error" in analysis_result: |
|
return f"**Error:** {analysis_result['error']}" |
|
|
|
lines = [] |
|
lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n") |
|
lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n") |
|
lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n") |
|
|
|
vram = analysis_result.get("vram_requirements", {}) |
|
lines.append("#### VRAM Requirements:") |
|
lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB") |
|
lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB") |
|
lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB") |
|
lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB") |
|
lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n") |
|
|
|
compatible_gpus = analysis_result.get("compatible_gpus", []) |
|
lines.append("#### Compatible GPUs:") |
|
if compatible_gpus: |
|
for gpu in compatible_gpus: |
|
lines.append(f"- {gpu}") |
|
else: |
|
lines.append("- None found") |
|
lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return "\n".join(lines) |
|
|
|
|
|
|
|
|
|
if page == "Model Selection": |
|
st.divider() |
|
st.header("LLM Pricing App") |
|
|
|
col_left, col_right = st.columns(2) |
|
with col_left: |
|
selected_provider = st.selectbox( |
|
"Select a provider", |
|
st.session_state["providers"], |
|
index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0 |
|
) |
|
selected_type = st.radio("Select type", options=["text", "image"], index=0) |
|
with col_right: |
|
filtered_models = provider_change(selected_provider, selected_type) |
|
if filtered_models: |
|
default_model = "o1" if "o1" in filtered_models else filtered_models[0] |
|
selected_model = st.selectbox("Select a model", options=filtered_models, index=filtered_models.index(default_model)) |
|
else: |
|
selected_model = None |
|
st.write("No models available") |
|
|
|
|
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
num_alerts = st.number_input("Security Alerts Per Day", value=100, min_value=1, step=1, |
|
help="Number of security alerts to analyze daily") |
|
with col2: |
|
input_size = st.number_input("Alert Content Size (characters)", value=1000, min_value=1, step=1, |
|
help="Include logs, metadata, and context per alert") |
|
with col3: |
|
output_size = st.number_input("Analysis Output Size (characters)", value=500, min_value=1, step=1, |
|
help="Expected length of security analysis and recommendations") |
|
|
|
|
|
btn_col1, btn_col2 = st.columns(2) |
|
with btn_col1: |
|
if st.button("Estimate"): |
|
if selected_model: |
|
st.session_state["result"] = estimate_cost(num_alerts, input_size, output_size, selected_model) |
|
else: |
|
st.session_state["result"] = "No model selected." |
|
with btn_col2: |
|
if st.button("Refresh Pricing Data"): |
|
with st.spinner("Refreshing pricing data..."): |
|
models, pricing, providers = asyncio.run(load_data()) |
|
st.session_state["models"] = models |
|
st.session_state["pricing"] = pricing |
|
st.session_state["providers"] = providers |
|
st.success("Pricing data refreshed!") |
|
|
|
st.divider() |
|
st.markdown("### Results") |
|
if "result" in st.session_state: |
|
st.write(st.session_state["result"]) |
|
else: |
|
st.write("Use the buttons above to estimate costs.") |
|
|
|
if st.button("Clear"): |
|
st.session_state.pop("result", None) |
|
|
|
elif page == "On Premise Estimator": |
|
st.divider() |
|
st.header("On Premise Estimator") |
|
st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.") |
|
hf_model_id = st.text_input("Hugging Face Model ID", value="meta-llama/Llama-4-Scout-17B-16E") |
|
|
|
if st.button("Analyze Model"): |
|
with st.spinner("Analyzing model..."): |
|
analysis_result = analyze_hf_model(hf_model_id) |
|
st.session_state["analysis_result"] = analysis_result |
|
|
|
if "analysis_result" in st.session_state: |
|
report = format_analysis_report(st.session_state["analysis_result"]) |
|
st.markdown(report) |
|
|
|
elif page == "About": |
|
st.divider() |
|
st.markdown( |
|
""" |
|
## About This App |
|
|
|
This is based on the tokonomics package. |
|
|
|
- The app downloads the latest pricing from the LiteLLM repository. |
|
- Using simple maths to estimate the total tokens. |
|
- Helps you estimate hardware requirements for running open-source large language models (LLMs) on-premise using only the model ID from Hugging Face. |
|
- Latest Version 0.1 |
|
|
|
--- |
|
|
|
### π Version History |
|
|
|
| Version | Release Date | Key Feature Updates | |
|
|--------|--------------|---------------------| |
|
| `v1.1` | 2025-04-06 | Added On Premise Estimator Feature | |
|
| `v1.0` | 2025-03-26 | Initial release with basic total tokens estimation | |
|
|
|
|
|
--- |
|
|
|
Website: [https://www.priam.ai](https://www.priam.ai) |
|
""" |
|
) |
|
st.markdown( |
|
""" |
|
### Found a Bug? |
|
|
|
If you encounter any issues or have feedback, please email to **[email protected]** |
|
|
|
Your input helps us improve the app! |
|
""" |
|
) |
|
|
|
|