Spaces:

tsphan
/

llm-token-counter

Running

llm-token-counter / utils.py

Timothy S. Phan

Splits into different pages to handle different models

176f432 7 days ago

2.03 kB

	import streamlit as st

	def set_page_config():
	"""
	Set the page configuration for the Streamlit app.
	"""
	st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="centered")

	def display_about_token_counting():
	"""
	Display additional information about token counting.
	"""
	with st.expander("About Token Counting"):
	st.markdown("""
	### What are tokens?

	Tokens are chunks of text that language models process. They can be parts of words, whole words,
	or even punctuation. Different models tokenize text differently.

	### Why count tokens?

	- Cost Management: Understanding token usage helps manage API costs
	- Model Limitations: Different models have different token limits
	- Performance Optimization: Helps optimize prompts for better responses

	### Token Counting Tips

	- Shorter messages use fewer tokens
	- Special formatting, code blocks, and unusual characters may use more tokens
	- For Claude models, the system message also counts toward your token usage
	- Hugging Face models may tokenize text differently than Anthropic models
	""")

	def display_footer():
	"""
	Display the footer of the Streamlit app.
	"""
	st.markdown("---")
	st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")

	# Constants for model options
	ANTHROPIC_MODEL_OPTIONS = {
	"Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
	"Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
	"Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
	"Claude 3 Haiku": "claude-3-haiku-20240307",
	"Claude 3 Opus": "claude-3-opus-20240229",
	}

	HF_MODEL_OPTIONS = [
	"mistralai/Mistral-Small-24B-Instruct-2501",
	"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
	"google/codegemma-7b",
	"Qwen/Qwen2.5-Coder-32B-Instruct",
	"microsoft/Phi-4-multimodal-instruct",
	"nvidia/Llama-3.3-70B-Instruct-FP4",
	"Other (specify)",
	]