Spaces:
Running
Running
Timothy S. Phan
commited on
Commit
·
176f432
1
Parent(s):
f4147c3
Splits into different pages to handle different models
Browse files- .streamlit/config.toml +2 -0
- app.py +21 -211
- config.py +0 -0
- pages/__init__.py +0 -0
- pages/anthropic_models.py +80 -0
- pages/huggingface_models.py +107 -0
- utils.py +58 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[client]
|
2 |
+
showSidebarNavigation = false
|
app.py
CHANGED
@@ -1,220 +1,30 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
import
|
4 |
-
import os
|
5 |
-
from huggingface_hub import login
|
6 |
-
from transformers import AutoTokenizer
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
st.title("🎈 LLM Token Counter")
|
11 |
-
st.markdown(
|
12 |
-
"This app counts tokens for different language models based on your input text."
|
13 |
-
)
|
14 |
|
15 |
-
# Tabs for model provider selection
|
16 |
-
provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
|
17 |
|
18 |
-
with provider_tab[0]: # Anthropic Models
|
19 |
-
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
"Enter your Anthropic API Key",
|
24 |
-
type="password",
|
25 |
-
help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
|
26 |
-
)
|
27 |
|
28 |
-
#
|
29 |
-
|
30 |
-
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
31 |
|
32 |
-
#
|
33 |
-
|
34 |
-
"Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
|
35 |
-
"Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
|
36 |
-
"Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
|
37 |
-
"Claude 3 Haiku": "claude-3-haiku-20240307",
|
38 |
-
"Claude 3 Opus": "claude-3-opus-20240229",
|
39 |
-
}
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
)
|
44 |
-
|
45 |
-
# System message (optional)
|
46 |
-
st.subheader("System Message (Optional)")
|
47 |
-
system_message = st.text_area(
|
48 |
-
"System Message", placeholder="e.g., You are a helpful assistant", height=100
|
49 |
-
)
|
50 |
-
|
51 |
-
# User message input
|
52 |
-
st.subheader("Message Content")
|
53 |
-
anthropic_user_message = st.text_area(
|
54 |
-
"Enter your message here",
|
55 |
-
placeholder="Hello, Claude! How are you today?",
|
56 |
-
height=200,
|
57 |
-
key="anthropic_message",
|
58 |
-
)
|
59 |
-
|
60 |
-
# Button to count tokens for Anthropic
|
61 |
-
if st.button("Count Tokens (Anthropic)"):
|
62 |
-
if not anthropic_key:
|
63 |
-
st.error(
|
64 |
-
"No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
|
65 |
-
)
|
66 |
-
elif not anthropic_user_message:
|
67 |
-
st.warning("Please enter a message to count tokens")
|
68 |
-
else:
|
69 |
-
try:
|
70 |
-
# Initialize client with API key
|
71 |
-
client = anthropic.Anthropic(api_key=anthropic_key)
|
72 |
-
|
73 |
-
# Create the request
|
74 |
-
count_request = {
|
75 |
-
"model": anthropic_model_options[selected_anthropic_model],
|
76 |
-
"messages": [{"role": "user", "content": anthropic_user_message}],
|
77 |
-
}
|
78 |
-
|
79 |
-
# Add system message if provided
|
80 |
-
if system_message:
|
81 |
-
count_request["system"] = system_message
|
82 |
-
|
83 |
-
# Make the API call to count tokens
|
84 |
-
response = client.messages.count_tokens(**count_request)
|
85 |
-
|
86 |
-
# Display results
|
87 |
-
st.success(f"Input tokens: {response.input_tokens}")
|
88 |
-
|
89 |
-
# Display the full JSON response in an expandable section
|
90 |
-
with st.expander("View Full API Response"):
|
91 |
-
st.code(
|
92 |
-
json.dumps(response.model_dump(), indent=2), language="json"
|
93 |
-
)
|
94 |
-
|
95 |
-
except Exception as e:
|
96 |
-
st.error(f"An error occurred: {str(e)}")
|
97 |
-
|
98 |
-
with provider_tab[1]: # Hugging Face Models
|
99 |
-
st.header("Hugging Face Models")
|
100 |
-
|
101 |
-
# HF Token input
|
102 |
-
hf_token = st.text_input(
|
103 |
-
"Enter your Hugging Face Token",
|
104 |
-
type="password",
|
105 |
-
help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
|
106 |
-
)
|
107 |
-
|
108 |
-
# If no token provided, try to get from environment
|
109 |
-
if not hf_token:
|
110 |
-
hf_token = os.environ.get("HF_TOKEN", "")
|
111 |
-
|
112 |
-
# Login status tracker
|
113 |
-
if "hf_logged_in" not in st.session_state:
|
114 |
-
st.session_state.hf_logged_in = False
|
115 |
-
|
116 |
-
# Login button
|
117 |
-
if not st.session_state.hf_logged_in and st.button("Login to Hugging Face"):
|
118 |
-
if not hf_token:
|
119 |
-
st.error(
|
120 |
-
"No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
|
121 |
-
)
|
122 |
-
else:
|
123 |
-
try:
|
124 |
-
login(token=hf_token)
|
125 |
-
st.session_state.hf_logged_in = True
|
126 |
-
st.success("Successfully logged in to Hugging Face")
|
127 |
-
except Exception as e:
|
128 |
-
st.error(f"Login failed: {str(e)}")
|
129 |
-
|
130 |
-
if st.session_state.hf_logged_in or hf_token:
|
131 |
-
# Predefined popular models
|
132 |
-
hf_model_options = [
|
133 |
-
"mistralai/Mistral-Small-24B-Instruct-2501",
|
134 |
-
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
135 |
-
"google/codegemma-7b",
|
136 |
-
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
137 |
-
"microsoft/Phi-4-multimodal-instruct",
|
138 |
-
"nvidia/Llama-3.3-70B-Instruct-FP4",
|
139 |
-
"Other (specify)",
|
140 |
-
]
|
141 |
-
|
142 |
-
selected_hf_model = st.selectbox("Select Hugging Face Model", hf_model_options)
|
143 |
-
|
144 |
-
# Custom model input
|
145 |
-
if selected_hf_model == "Other (specify)":
|
146 |
-
custom_hf_model = st.text_input(
|
147 |
-
"Enter model name (e.g., organization/model-name)"
|
148 |
-
)
|
149 |
-
selected_hf_model = (
|
150 |
-
custom_hf_model if custom_hf_model else "gpt2"
|
151 |
-
) # Default to gpt2 if empty
|
152 |
-
|
153 |
-
# User message input for HF
|
154 |
-
hf_user_message = st.text_area(
|
155 |
-
"Enter your message here",
|
156 |
-
placeholder="Hello, world!",
|
157 |
-
height=200,
|
158 |
-
key="hf_message",
|
159 |
-
)
|
160 |
-
|
161 |
-
# Button to count tokens for HF
|
162 |
-
if st.button("Count Tokens (Hugging Face)"):
|
163 |
-
if not hf_user_message:
|
164 |
-
st.warning("Please enter a message to count tokens")
|
165 |
-
else:
|
166 |
-
try:
|
167 |
-
with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
|
168 |
-
tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
|
169 |
-
|
170 |
-
# Count tokens in different ways
|
171 |
-
tokens = tokenizer.tokenize(hf_user_message)
|
172 |
-
token_ids = tokenizer.encode(hf_user_message)
|
173 |
-
|
174 |
-
# Display results
|
175 |
-
st.success(f"Token count: {len(tokens)}")
|
176 |
-
st.success(f"Token IDs count: {len(token_ids)}")
|
177 |
-
|
178 |
-
# Show the actual tokens
|
179 |
-
with st.expander("View Token Details"):
|
180 |
-
col1, col2 = st.columns(2)
|
181 |
-
with col1:
|
182 |
-
st.subheader("Tokens")
|
183 |
-
st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
|
184 |
-
with col2:
|
185 |
-
st.subheader("Token IDs")
|
186 |
-
st.json(
|
187 |
-
[
|
188 |
-
f"{i}: {token_id}"
|
189 |
-
for i, token_id in enumerate(token_ids)
|
190 |
-
]
|
191 |
-
)
|
192 |
-
|
193 |
-
except Exception as e:
|
194 |
-
st.error(f"An error occurred: {str(e)}")
|
195 |
-
|
196 |
-
# Additional information
|
197 |
-
with st.expander("About Token Counting"):
|
198 |
-
st.markdown("""
|
199 |
-
### What are tokens?
|
200 |
-
|
201 |
-
Tokens are chunks of text that language models process. They can be parts of words, whole words,
|
202 |
-
or even punctuation. Different models tokenize text differently.
|
203 |
-
|
204 |
-
### Why count tokens?
|
205 |
-
|
206 |
-
- **Cost Management**: Understanding token usage helps manage API costs
|
207 |
-
- **Model Limitations**: Different models have different token limits
|
208 |
-
- **Performance Optimization**: Helps optimize prompts for better responses
|
209 |
-
|
210 |
-
### Token Counting Tips
|
211 |
-
|
212 |
-
- Shorter messages use fewer tokens
|
213 |
-
- Special formatting, code blocks, and unusual characters may use more tokens
|
214 |
-
- For Claude models, the system message also counts toward your token usage
|
215 |
-
- Hugging Face models may tokenize text differently than Anthropic models
|
216 |
-
""")
|
217 |
-
|
218 |
-
# Footer
|
219 |
-
st.markdown("---")
|
220 |
-
st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")
|
|
|
1 |
import streamlit as st
|
2 |
+
from pages import anthropic_models, huggingface_models
|
3 |
+
from utils import set_page_config, display_about_token_counting, display_footer
|
|
|
|
|
|
|
4 |
|
5 |
+
def main():
|
6 |
+
"""
|
7 |
+
Main entry point for the Streamlit application.
|
8 |
+
"""
|
9 |
+
set_page_config()
|
10 |
|
11 |
+
st.title("🎈 LLM Token Counter")
|
12 |
+
st.markdown("This app counts tokens for different language models based on your input text.")
|
|
|
|
|
13 |
|
14 |
+
# Tabs for model provider selection
|
15 |
+
provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
|
16 |
|
17 |
+
with provider_tab[0]: # Anthropic Models
|
18 |
+
anthropic_models.display()
|
19 |
|
20 |
+
with provider_tab[1]: # Hugging Face Models
|
21 |
+
huggingface_models.display()
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
# Additional information
|
24 |
+
display_about_token_counting()
|
|
|
25 |
|
26 |
+
# Footer
|
27 |
+
display_footer()
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
if __name__ == "__main__":
|
30 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.py
ADDED
File without changes
|
pages/__init__.py
ADDED
File without changes
|
pages/anthropic_models.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import anthropic
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
from utils import ANTHROPIC_MODEL_OPTIONS
|
6 |
+
|
7 |
+
def display():
|
8 |
+
"""
|
9 |
+
Display the Anthropic models tab.
|
10 |
+
"""
|
11 |
+
st.header("Anthropic (Claude) Models")
|
12 |
+
|
13 |
+
# API key input (with warning about security)
|
14 |
+
anthropic_key = st.text_input(
|
15 |
+
"Enter your Anthropic API Key",
|
16 |
+
type="password",
|
17 |
+
help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
|
18 |
+
)
|
19 |
+
|
20 |
+
# If no key provided, try to get from environment
|
21 |
+
if not anthropic_key:
|
22 |
+
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
23 |
+
|
24 |
+
# Model selection for Anthropic
|
25 |
+
selected_anthropic_model = st.selectbox(
|
26 |
+
"Select Claude Model", list(ANTHROPIC_MODEL_OPTIONS.keys())
|
27 |
+
)
|
28 |
+
|
29 |
+
# System message (optional)
|
30 |
+
st.subheader("System Message (Optional)")
|
31 |
+
system_message = st.text_area(
|
32 |
+
"System Message", placeholder="e.g., You are a helpful assistant", height=100
|
33 |
+
)
|
34 |
+
|
35 |
+
# User message input
|
36 |
+
st.subheader("Message Content")
|
37 |
+
anthropic_user_message = st.text_area(
|
38 |
+
"Enter your message here",
|
39 |
+
placeholder="Hello, Claude! How are you today?",
|
40 |
+
height=200,
|
41 |
+
key="anthropic_message",
|
42 |
+
)
|
43 |
+
|
44 |
+
# Button to count tokens for Anthropic
|
45 |
+
if st.button("Count Tokens (Anthropic)"):
|
46 |
+
if not anthropic_key:
|
47 |
+
st.error(
|
48 |
+
"No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
|
49 |
+
)
|
50 |
+
elif not anthropic_user_message:
|
51 |
+
st.warning("Please enter a message to count tokens")
|
52 |
+
else:
|
53 |
+
try:
|
54 |
+
# Initialize client with API key
|
55 |
+
client = anthropic.Anthropic(api_key=anthropic_key)
|
56 |
+
|
57 |
+
# Create the request
|
58 |
+
count_request = {
|
59 |
+
"model": ANTHROPIC_MODEL_OPTIONS[selected_anthropic_model],
|
60 |
+
"messages": [{"role": "user", "content": anthropic_user_message}],
|
61 |
+
}
|
62 |
+
|
63 |
+
# Add system message if provided
|
64 |
+
if system_message:
|
65 |
+
count_request["system"] = system_message
|
66 |
+
|
67 |
+
# Make the API call to count tokens
|
68 |
+
response = client.messages.count_tokens(**count_request)
|
69 |
+
|
70 |
+
# Display results
|
71 |
+
st.success(f"Input tokens: {response.input_tokens}")
|
72 |
+
|
73 |
+
# Display the full JSON response in an expandable section
|
74 |
+
with st.expander("View Full API Response"):
|
75 |
+
st.code(
|
76 |
+
json.dumps(response.model_dump(), indent=2), language="json"
|
77 |
+
)
|
78 |
+
|
79 |
+
except Exception as e:
|
80 |
+
st.error(f"An error occurred: {str(e)}")
|
pages/huggingface_models.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from huggingface_hub import login
|
3 |
+
from transformers import AutoTokenizer
|
4 |
+
import os
|
5 |
+
from utils import HF_MODEL_OPTIONS
|
6 |
+
|
7 |
+
def display():
|
8 |
+
"""
|
9 |
+
Display the Hugging Face models tab.
|
10 |
+
"""
|
11 |
+
st.header("Hugging Face Models")
|
12 |
+
|
13 |
+
# HF Token input
|
14 |
+
hf_token = st.text_input(
|
15 |
+
"Enter your Hugging Face Token",
|
16 |
+
type="password",
|
17 |
+
help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
|
18 |
+
)
|
19 |
+
|
20 |
+
# If no token provided, try to get from environment
|
21 |
+
if not hf_token:
|
22 |
+
hf_token = os.environ.get("HF_TOKEN", "")
|
23 |
+
|
24 |
+
# Login status tracker
|
25 |
+
if "hf_logged_in" not in st.session_state:
|
26 |
+
st.session_state.hf_logged_in = False
|
27 |
+
|
28 |
+
# Login button
|
29 |
+
if not st.session_state.hf_logged_in:
|
30 |
+
if st.button("Login to Hugging Face"):
|
31 |
+
if not hf_token:
|
32 |
+
st.error(
|
33 |
+
"No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
|
34 |
+
)
|
35 |
+
else:
|
36 |
+
try:
|
37 |
+
login(token=hf_token)
|
38 |
+
st.session_state.hf_logged_in = True
|
39 |
+
st.session_state.hf_token = hf_token # Store the token in session state
|
40 |
+
st.success("Successfully logged in to Hugging Face")
|
41 |
+
except Exception as e:
|
42 |
+
st.error(f"Login failed: {str(e)}")
|
43 |
+
else:
|
44 |
+
st.success("Logged in to Hugging Face")
|
45 |
+
|
46 |
+
# Logout button
|
47 |
+
if st.button("Logout"):
|
48 |
+
st.session_state.hf_logged_in = False
|
49 |
+
st.session_state.hf_token = "" # Clear the token from session state
|
50 |
+
st.success("Successfully logged out from Hugging Face")
|
51 |
+
st.experimental_rerun() # Rerun the script to refresh the UI
|
52 |
+
|
53 |
+
if st.session_state.hf_logged_in or hf_token:
|
54 |
+
# Predefined popular models
|
55 |
+
selected_hf_model = st.selectbox("Select Hugging Face Model", HF_MODEL_OPTIONS)
|
56 |
+
|
57 |
+
# Custom model input
|
58 |
+
if selected_hf_model == "Other (specify)":
|
59 |
+
custom_hf_model = st.text_input(
|
60 |
+
"Enter model name (e.g., organization/model-name)"
|
61 |
+
)
|
62 |
+
selected_hf_model = (
|
63 |
+
custom_hf_model if custom_hf_model else "gpt2"
|
64 |
+
) # Default to gpt2 if empty
|
65 |
+
|
66 |
+
# User message input for HF
|
67 |
+
hf_user_message = st.text_area(
|
68 |
+
"Enter your message here",
|
69 |
+
placeholder="Hello, world!",
|
70 |
+
height=200,
|
71 |
+
key="hf_message",
|
72 |
+
)
|
73 |
+
|
74 |
+
# Button to count tokens for HF
|
75 |
+
if st.button("Count Tokens (Hugging Face)"):
|
76 |
+
if not hf_user_message:
|
77 |
+
st.warning("Please enter a message to count tokens")
|
78 |
+
else:
|
79 |
+
try:
|
80 |
+
with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
|
81 |
+
tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
|
82 |
+
|
83 |
+
# Count tokens in different ways
|
84 |
+
tokens = tokenizer.tokenize(hf_user_message)
|
85 |
+
token_ids = tokenizer.encode(hf_user_message)
|
86 |
+
|
87 |
+
# Display results
|
88 |
+
st.success(f"Token count: {len(tokens)}")
|
89 |
+
st.success(f"Token IDs count: {len(token_ids)}")
|
90 |
+
|
91 |
+
# Show the actual tokens
|
92 |
+
with st.expander("View Token Details"):
|
93 |
+
col1, col2 = st.columns(2)
|
94 |
+
with col1:
|
95 |
+
st.subheader("Tokens")
|
96 |
+
st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
|
97 |
+
with col2:
|
98 |
+
st.subheader("Token IDs")
|
99 |
+
st.json(
|
100 |
+
[
|
101 |
+
f"{i}: {token_id}"
|
102 |
+
for i, token_id in enumerate(token_ids)
|
103 |
+
]
|
104 |
+
)
|
105 |
+
|
106 |
+
except Exception as e:
|
107 |
+
st.error(f"An error occurred: {str(e)}")
|
utils.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
def set_page_config():
|
4 |
+
"""
|
5 |
+
Set the page configuration for the Streamlit app.
|
6 |
+
"""
|
7 |
+
st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="centered")
|
8 |
+
|
9 |
+
def display_about_token_counting():
|
10 |
+
"""
|
11 |
+
Display additional information about token counting.
|
12 |
+
"""
|
13 |
+
with st.expander("About Token Counting"):
|
14 |
+
st.markdown("""
|
15 |
+
### What are tokens?
|
16 |
+
|
17 |
+
Tokens are chunks of text that language models process. They can be parts of words, whole words,
|
18 |
+
or even punctuation. Different models tokenize text differently.
|
19 |
+
|
20 |
+
### Why count tokens?
|
21 |
+
|
22 |
+
- **Cost Management**: Understanding token usage helps manage API costs
|
23 |
+
- **Model Limitations**: Different models have different token limits
|
24 |
+
- **Performance Optimization**: Helps optimize prompts for better responses
|
25 |
+
|
26 |
+
### Token Counting Tips
|
27 |
+
|
28 |
+
- Shorter messages use fewer tokens
|
29 |
+
- Special formatting, code blocks, and unusual characters may use more tokens
|
30 |
+
- For Claude models, the system message also counts toward your token usage
|
31 |
+
- Hugging Face models may tokenize text differently than Anthropic models
|
32 |
+
""")
|
33 |
+
|
34 |
+
def display_footer():
|
35 |
+
"""
|
36 |
+
Display the footer of the Streamlit app.
|
37 |
+
"""
|
38 |
+
st.markdown("---")
|
39 |
+
st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")
|
40 |
+
|
41 |
+
# Constants for model options
|
42 |
+
ANTHROPIC_MODEL_OPTIONS = {
|
43 |
+
"Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
|
44 |
+
"Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
|
45 |
+
"Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
|
46 |
+
"Claude 3 Haiku": "claude-3-haiku-20240307",
|
47 |
+
"Claude 3 Opus": "claude-3-opus-20240229",
|
48 |
+
}
|
49 |
+
|
50 |
+
HF_MODEL_OPTIONS = [
|
51 |
+
"mistralai/Mistral-Small-24B-Instruct-2501",
|
52 |
+
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
53 |
+
"google/codegemma-7b",
|
54 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
55 |
+
"microsoft/Phi-4-multimodal-instruct",
|
56 |
+
"nvidia/Llama-3.3-70B-Instruct-FP4",
|
57 |
+
"Other (specify)",
|
58 |
+
]
|