Timothy S. Phan
adds iniital streamlit app to hf spaces
f4147c3
raw
history blame
8.16 kB
import streamlit as st
import anthropic
import json
import os
from huggingface_hub import login
from transformers import AutoTokenizer
st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="wide")
st.title("🎈 LLM Token Counter")
st.markdown(
"This app counts tokens for different language models based on your input text."
)
# Tabs for model provider selection
provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
with provider_tab[0]: # Anthropic Models
st.header("Anthropic (Claude) Models")
# API key input (with warning about security)
anthropic_key = st.text_input(
"Enter your Anthropic API Key",
type="password",
help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
)
# If no key provided, try to get from environment
if not anthropic_key:
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
# Model selection for Anthropic
anthropic_model_options = {
"Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
"Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
"Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
"Claude 3 Haiku": "claude-3-haiku-20240307",
"Claude 3 Opus": "claude-3-opus-20240229",
}
selected_anthropic_model = st.selectbox(
"Select Claude Model", list(anthropic_model_options.keys())
)
# System message (optional)
st.subheader("System Message (Optional)")
system_message = st.text_area(
"System Message", placeholder="e.g., You are a helpful assistant", height=100
)
# User message input
st.subheader("Message Content")
anthropic_user_message = st.text_area(
"Enter your message here",
placeholder="Hello, Claude! How are you today?",
height=200,
key="anthropic_message",
)
# Button to count tokens for Anthropic
if st.button("Count Tokens (Anthropic)"):
if not anthropic_key:
st.error(
"No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
)
elif not anthropic_user_message:
st.warning("Please enter a message to count tokens")
else:
try:
# Initialize client with API key
client = anthropic.Anthropic(api_key=anthropic_key)
# Create the request
count_request = {
"model": anthropic_model_options[selected_anthropic_model],
"messages": [{"role": "user", "content": anthropic_user_message}],
}
# Add system message if provided
if system_message:
count_request["system"] = system_message
# Make the API call to count tokens
response = client.messages.count_tokens(**count_request)
# Display results
st.success(f"Input tokens: {response.input_tokens}")
# Display the full JSON response in an expandable section
with st.expander("View Full API Response"):
st.code(
json.dumps(response.model_dump(), indent=2), language="json"
)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
with provider_tab[1]: # Hugging Face Models
st.header("Hugging Face Models")
# HF Token input
hf_token = st.text_input(
"Enter your Hugging Face Token",
type="password",
help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
)
# If no token provided, try to get from environment
if not hf_token:
hf_token = os.environ.get("HF_TOKEN", "")
# Login status tracker
if "hf_logged_in" not in st.session_state:
st.session_state.hf_logged_in = False
# Login button
if not st.session_state.hf_logged_in and st.button("Login to Hugging Face"):
if not hf_token:
st.error(
"No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
)
else:
try:
login(token=hf_token)
st.session_state.hf_logged_in = True
st.success("Successfully logged in to Hugging Face")
except Exception as e:
st.error(f"Login failed: {str(e)}")
if st.session_state.hf_logged_in or hf_token:
# Predefined popular models
hf_model_options = [
"mistralai/Mistral-Small-24B-Instruct-2501",
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
"google/codegemma-7b",
"Qwen/Qwen2.5-Coder-32B-Instruct",
"microsoft/Phi-4-multimodal-instruct",
"nvidia/Llama-3.3-70B-Instruct-FP4",
"Other (specify)",
]
selected_hf_model = st.selectbox("Select Hugging Face Model", hf_model_options)
# Custom model input
if selected_hf_model == "Other (specify)":
custom_hf_model = st.text_input(
"Enter model name (e.g., organization/model-name)"
)
selected_hf_model = (
custom_hf_model if custom_hf_model else "gpt2"
) # Default to gpt2 if empty
# User message input for HF
hf_user_message = st.text_area(
"Enter your message here",
placeholder="Hello, world!",
height=200,
key="hf_message",
)
# Button to count tokens for HF
if st.button("Count Tokens (Hugging Face)"):
if not hf_user_message:
st.warning("Please enter a message to count tokens")
else:
try:
with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
# Count tokens in different ways
tokens = tokenizer.tokenize(hf_user_message)
token_ids = tokenizer.encode(hf_user_message)
# Display results
st.success(f"Token count: {len(tokens)}")
st.success(f"Token IDs count: {len(token_ids)}")
# Show the actual tokens
with st.expander("View Token Details"):
col1, col2 = st.columns(2)
with col1:
st.subheader("Tokens")
st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
with col2:
st.subheader("Token IDs")
st.json(
[
f"{i}: {token_id}"
for i, token_id in enumerate(token_ids)
]
)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
# Additional information
with st.expander("About Token Counting"):
st.markdown("""
### What are tokens?
Tokens are chunks of text that language models process. They can be parts of words, whole words,
or even punctuation. Different models tokenize text differently.
### Why count tokens?
- **Cost Management**: Understanding token usage helps manage API costs
- **Model Limitations**: Different models have different token limits
- **Performance Optimization**: Helps optimize prompts for better responses
### Token Counting Tips
- Shorter messages use fewer tokens
- Special formatting, code blocks, and unusual characters may use more tokens
- For Claude models, the system message also counts toward your token usage
- Hugging Face models may tokenize text differently than Anthropic models
""")
# Footer
st.markdown("---")
st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")