File size: 10,409 Bytes
90c9a37
 
 
9b8af98
6913a64
90c9a37
3548bcc
90c9a37
 
 
 
3548bcc
90c9a37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67137a1
90c9a37
 
 
 
 
 
 
 
9b8af98
 
90c9a37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b8af98
 
67137a1
90c9a37
 
 
 
67137a1
90c9a37
67137a1
 
 
 
 
90c9a37
67137a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3548bcc
 
 
 
 
 
 
 
 
 
67137a1
 
6913a64
67137a1
 
 
 
 
90c9a37
 
 
 
9b8af98
 
 
 
 
90c9a37
 
 
 
9b8af98
67137a1
90c9a37
 
 
67137a1
90c9a37
 
 
67137a1
 
90c9a37
67137a1
 
90c9a37
67137a1
 
 
90c9a37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67137a1
90c9a37
 
 
 
 
 
67137a1
90c9a37
 
6913a64
67137a1
 
6913a64
 
3548bcc
6913a64
 
 
 
67137a1
 
6913a64
67137a1
6913a64
 
67137a1
 
90c9a37
 
 
 
 
 
 
 
67137a1
3548bcc
90c9a37
6913a64
 
 
 
 
 
3548bcc
67137a1
3548bcc
6913a64
 
 
90c9a37
 
 
6913a64
3548bcc
 
 
 
 
 
 
 
6913a64
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import streamlit as st
import asyncio
import tokonomics
from utils import create_model_hierarchy
from utils_on import analyze_hf_model  # New import for On Premise Estimator functionality

st.set_page_config(page_title="LLM Pricing Calculator", layout="wide")

# --------------------------
# Async Data Loading Function
# --------------------------

async def load_data():
    """Simulate loading data asynchronously."""
    AVAILABLE_MODELS = await tokonomics.get_available_models()
    hierarchy = create_model_hierarchy(AVAILABLE_MODELS)
    FILTERED_MODELS = []
    MODEL_PRICING = {}
    PROVIDERS = list(hierarchy.keys())
    for provider in PROVIDERS:
        for model_family in hierarchy[provider]:
            for model_version in hierarchy[provider][model_family].keys():
                for region in hierarchy[provider][model_family][model_version]:
                    model_id = hierarchy[provider][model_family][model_version][region]
                    MODEL_PRICING[model_id] = await tokonomics.get_model_costs(model_id)
                    FILTERED_MODELS.append(model_id)
    return FILTERED_MODELS, MODEL_PRICING, PROVIDERS

# --------------------------
# Provider Change Function
# --------------------------
def provider_change(provider, selected_type, all_types=["text", "vision", "video", "image"]):
    """Filter models based on the selected provider and type."""
    all_models = st.session_state.get("models", [])
    new_models = []
    others = [a_type for a_type in all_types if selected_type != a_type]
    for model_name in all_models:
        if provider in model_name:
            if selected_type in model_name:
                new_models.append(model_name)
            elif any(other in model_name for other in others):
                continue
            else:
                new_models.append(model_name)
    return new_models if new_models else all_models

# --------------------------
# Estimate Cost Function
# --------------------------
def estimate_cost(num_alerts, input_size, output_size, model_id):
    pricing = st.session_state.get("pricing", {})
    cost_token = pricing.get(model_id)
    if not cost_token:
        return "NA"
    input_tokens = round(input_size * 1.3)
    output_tokens = round(output_size * 1.3)
    price_day = cost_token.get("input_cost_per_token", 0) * input_tokens + \
                cost_token.get("output_cost_per_token", 0) * output_tokens
    price_total = price_day * num_alerts
    return f"""## Estimated Cost:

Day Price: {price_total:0.2f} USD  
Month Price: {price_total * 31:0.2f} USD  
Year Price: {price_total * 365:0.2f} USD  
"""

# --------------------------
# Load Data into Session State (only once)
# --------------------------
if "data_loaded" not in st.session_state:
    with st.spinner("Loading pricing data..."):
        models, pricing, providers = asyncio.run(load_data())
        st.session_state["models"] = models
        st.session_state["pricing"] = pricing
        st.session_state["providers"] = providers
        st.session_state["data_loaded"] = True

# --------------------------
# Sidebar
# --------------------------
with st.sidebar:
    st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg",
             use_container_width=True)
    st.markdown("Visit: [https://www.priam.ai](https://www.priam.ai)")
    st.divider()
    st.sidebar.title("LLM Pricing Calculator")

# --------------------------
# Pills Navigation (Using st.pills)
# --------------------------
# st.pills creates a pill-style selection widget.
page = st.pills("Head",
    options=["Model Selection", "On Premise Estimator", "About"],selection_mode="single",default="Model Selection",label_visibility="hidden",
    #index=0  # Change index if you want a different default
)

# --------------------------
# Helper: Format Analysis Report
# --------------------------
def format_analysis_report(analysis_result: dict) -> str:
    """Convert the raw analysis_result dict into a human-readable report."""
    if "error" in analysis_result:
        return f"**Error:** {analysis_result['error']}"
    
    lines = []
    lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n")
    lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n")
    lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n")
    
    vram = analysis_result.get("vram_requirements", {})
    lines.append("#### VRAM Requirements:")
    lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB")
    lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB")
    lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB")
    lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB")
    lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n")
    
    compatible_gpus = analysis_result.get("compatible_gpus", [])
    lines.append("#### Compatible GPUs:")
    if compatible_gpus:
        for gpu in compatible_gpus:
            lines.append(f"- {gpu}")
    else:
        lines.append("- None found")
    lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n")
    
    #gpu_perf = analysis_result.get("gpu_performance", {})
    #if gpu_perf:
    #    lines.append("#### GPU Performance:")
    #    for gpu, perf in gpu_perf.items():
        #    lines.append(f"**{gpu}:**")
       #     lines.append(f"  - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}")
      #      lines.append(f"  - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}")
     #       lines.append(f"  - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n")
    #else:
    #    lines.append("#### GPU Performance: N/A\n")
    
    return "\n".join(lines)

# --------------------------
# Render Content Based on Selected Pill
# --------------------------
if page == "Model Selection":
    st.divider()
    st.header("LLM Pricing App")
    # --- Row 1: Provider/Type and Model Selection ---
    col_left, col_right = st.columns(2)
    with col_left:
        selected_provider = st.selectbox(
            "Select a provider",
            st.session_state["providers"],
            index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0
        )
        selected_type = st.radio("Select type", options=["text", "image"], index=0)
    with col_right:
        filtered_models = provider_change(selected_provider, selected_type)
        if filtered_models:
            default_model = "o1" if "o1" in filtered_models else filtered_models[0]
            selected_model = st.selectbox("Select a model", options=filtered_models, index=filtered_models.index(default_model))
        else:
            selected_model = None
            st.write("No models available")
    
    # --- Row 2: Alert Stats ---
    col1, col2, col3 = st.columns(3)
    with col1:
        num_alerts = st.number_input("Security Alerts Per Day", value=100, min_value=1, step=1,
                                     help="Number of security alerts to analyze daily")
    with col2:
        input_size = st.number_input("Alert Content Size (characters)", value=1000, min_value=1, step=1,
                                     help="Include logs, metadata, and context per alert")
    with col3:
        output_size = st.number_input("Analysis Output Size (characters)", value=500, min_value=1, step=1,
                                      help="Expected length of security analysis and recommendations")
    
    # --- Row 3: Buttons ---
    btn_col1, btn_col2 = st.columns(2)
    with btn_col1:
        if st.button("Estimate"):
            if selected_model:
                st.session_state["result"] = estimate_cost(num_alerts, input_size, output_size, selected_model)
            else:
                st.session_state["result"] = "No model selected."
    with btn_col2:
        if st.button("Refresh Pricing Data"):
            with st.spinner("Refreshing pricing data..."):
                models, pricing, providers = asyncio.run(load_data())
                st.session_state["models"] = models
                st.session_state["pricing"] = pricing
                st.session_state["providers"] = providers
                st.success("Pricing data refreshed!")
    
    st.divider()
    st.markdown("### Results")
    if "result" in st.session_state:
        st.write(st.session_state["result"])
    else:
        st.write("Use the buttons above to estimate costs.")
    
    if st.button("Clear"):
        st.session_state.pop("result", None)

elif page == "On Premise Estimator":
    st.divider()
    st.header("On Premise Estimator")
    st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.")
    hf_model_id = st.text_input("Hugging Face Model ID", value="meta-llama/Llama-4-Scout-17B-16E")
    
    if st.button("Analyze Model"):
        with st.spinner("Analyzing model..."):
            analysis_result = analyze_hf_model(hf_model_id)
        st.session_state["analysis_result"] = analysis_result
    
    if "analysis_result" in st.session_state:
        report = format_analysis_report(st.session_state["analysis_result"])
        st.markdown(report)

elif page == "About":
    st.divider()
    st.markdown(
        """
        ## About This App

        This is based on the tokonomics package.

        - The app downloads the latest pricing from the LiteLLM repository.
        - Using simple maths to estimate the total tokens.
        - Helps you estimate hardware requirements for running open-source large language models (LLMs) on-premise using only the model ID from Hugging Face.
        - Latest Version 0.1

        ---

        ### πŸ“Œ Version History

        | Version | Release Date | Key Feature Updates |
        |--------|--------------|---------------------|
        | `v1.1` | 2025-04-06  | Added On Premise Estimator Feature |
        | `v1.0` | 2025-03-26  | Initial release with basic total tokens estimation |
        

        ---

        Website: [https://www.priam.ai](https://www.priam.ai)
        """
    )
    st.markdown(
    """
    ### Found a Bug?

    If you encounter any issues or have feedback, please email to **[email protected]**

    Your input helps us improve the app!
    """
)