llm-token-counter / utils.py
Timothy S. Phan
Splits into different pages to handle different models
176f432
import streamlit as st
def set_page_config():
"""
Set the page configuration for the Streamlit app.
"""
st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="centered")
def display_about_token_counting():
"""
Display additional information about token counting.
"""
with st.expander("About Token Counting"):
st.markdown("""
### What are tokens?
Tokens are chunks of text that language models process. They can be parts of words, whole words,
or even punctuation. Different models tokenize text differently.
### Why count tokens?
- **Cost Management**: Understanding token usage helps manage API costs
- **Model Limitations**: Different models have different token limits
- **Performance Optimization**: Helps optimize prompts for better responses
### Token Counting Tips
- Shorter messages use fewer tokens
- Special formatting, code blocks, and unusual characters may use more tokens
- For Claude models, the system message also counts toward your token usage
- Hugging Face models may tokenize text differently than Anthropic models
""")
def display_footer():
"""
Display the footer of the Streamlit app.
"""
st.markdown("---")
st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")
# Constants for model options
ANTHROPIC_MODEL_OPTIONS = {
"Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
"Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
"Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
"Claude 3 Haiku": "claude-3-haiku-20240307",
"Claude 3 Opus": "claude-3-opus-20240229",
}
HF_MODEL_OPTIONS = [
"mistralai/Mistral-Small-24B-Instruct-2501",
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
"google/codegemma-7b",
"Qwen/Qwen2.5-Coder-32B-Instruct",
"microsoft/Phi-4-multimodal-instruct",
"nvidia/Llama-3.3-70B-Instruct-FP4",
"Other (specify)",
]