Spaces:
Running
Running
Timothy S. Phan
commited on
Commit
·
f4147c3
1
Parent(s):
7b0e2d6
adds iniital streamlit app to hf spaces
Browse files- app.py +220 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import anthropic
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
from huggingface_hub import login
|
6 |
+
from transformers import AutoTokenizer
|
7 |
+
|
8 |
+
st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="wide")
|
9 |
+
|
10 |
+
st.title("🎈 LLM Token Counter")
|
11 |
+
st.markdown(
|
12 |
+
"This app counts tokens for different language models based on your input text."
|
13 |
+
)
|
14 |
+
|
15 |
+
# Tabs for model provider selection
|
16 |
+
provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
|
17 |
+
|
18 |
+
with provider_tab[0]: # Anthropic Models
|
19 |
+
st.header("Anthropic (Claude) Models")
|
20 |
+
|
21 |
+
# API key input (with warning about security)
|
22 |
+
anthropic_key = st.text_input(
|
23 |
+
"Enter your Anthropic API Key",
|
24 |
+
type="password",
|
25 |
+
help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
|
26 |
+
)
|
27 |
+
|
28 |
+
# If no key provided, try to get from environment
|
29 |
+
if not anthropic_key:
|
30 |
+
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
31 |
+
|
32 |
+
# Model selection for Anthropic
|
33 |
+
anthropic_model_options = {
|
34 |
+
"Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
|
35 |
+
"Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
|
36 |
+
"Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
|
37 |
+
"Claude 3 Haiku": "claude-3-haiku-20240307",
|
38 |
+
"Claude 3 Opus": "claude-3-opus-20240229",
|
39 |
+
}
|
40 |
+
|
41 |
+
selected_anthropic_model = st.selectbox(
|
42 |
+
"Select Claude Model", list(anthropic_model_options.keys())
|
43 |
+
)
|
44 |
+
|
45 |
+
# System message (optional)
|
46 |
+
st.subheader("System Message (Optional)")
|
47 |
+
system_message = st.text_area(
|
48 |
+
"System Message", placeholder="e.g., You are a helpful assistant", height=100
|
49 |
+
)
|
50 |
+
|
51 |
+
# User message input
|
52 |
+
st.subheader("Message Content")
|
53 |
+
anthropic_user_message = st.text_area(
|
54 |
+
"Enter your message here",
|
55 |
+
placeholder="Hello, Claude! How are you today?",
|
56 |
+
height=200,
|
57 |
+
key="anthropic_message",
|
58 |
+
)
|
59 |
+
|
60 |
+
# Button to count tokens for Anthropic
|
61 |
+
if st.button("Count Tokens (Anthropic)"):
|
62 |
+
if not anthropic_key:
|
63 |
+
st.error(
|
64 |
+
"No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
|
65 |
+
)
|
66 |
+
elif not anthropic_user_message:
|
67 |
+
st.warning("Please enter a message to count tokens")
|
68 |
+
else:
|
69 |
+
try:
|
70 |
+
# Initialize client with API key
|
71 |
+
client = anthropic.Anthropic(api_key=anthropic_key)
|
72 |
+
|
73 |
+
# Create the request
|
74 |
+
count_request = {
|
75 |
+
"model": anthropic_model_options[selected_anthropic_model],
|
76 |
+
"messages": [{"role": "user", "content": anthropic_user_message}],
|
77 |
+
}
|
78 |
+
|
79 |
+
# Add system message if provided
|
80 |
+
if system_message:
|
81 |
+
count_request["system"] = system_message
|
82 |
+
|
83 |
+
# Make the API call to count tokens
|
84 |
+
response = client.messages.count_tokens(**count_request)
|
85 |
+
|
86 |
+
# Display results
|
87 |
+
st.success(f"Input tokens: {response.input_tokens}")
|
88 |
+
|
89 |
+
# Display the full JSON response in an expandable section
|
90 |
+
with st.expander("View Full API Response"):
|
91 |
+
st.code(
|
92 |
+
json.dumps(response.model_dump(), indent=2), language="json"
|
93 |
+
)
|
94 |
+
|
95 |
+
except Exception as e:
|
96 |
+
st.error(f"An error occurred: {str(e)}")
|
97 |
+
|
98 |
+
with provider_tab[1]: # Hugging Face Models
|
99 |
+
st.header("Hugging Face Models")
|
100 |
+
|
101 |
+
# HF Token input
|
102 |
+
hf_token = st.text_input(
|
103 |
+
"Enter your Hugging Face Token",
|
104 |
+
type="password",
|
105 |
+
help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
|
106 |
+
)
|
107 |
+
|
108 |
+
# If no token provided, try to get from environment
|
109 |
+
if not hf_token:
|
110 |
+
hf_token = os.environ.get("HF_TOKEN", "")
|
111 |
+
|
112 |
+
# Login status tracker
|
113 |
+
if "hf_logged_in" not in st.session_state:
|
114 |
+
st.session_state.hf_logged_in = False
|
115 |
+
|
116 |
+
# Login button
|
117 |
+
if not st.session_state.hf_logged_in and st.button("Login to Hugging Face"):
|
118 |
+
if not hf_token:
|
119 |
+
st.error(
|
120 |
+
"No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
|
121 |
+
)
|
122 |
+
else:
|
123 |
+
try:
|
124 |
+
login(token=hf_token)
|
125 |
+
st.session_state.hf_logged_in = True
|
126 |
+
st.success("Successfully logged in to Hugging Face")
|
127 |
+
except Exception as e:
|
128 |
+
st.error(f"Login failed: {str(e)}")
|
129 |
+
|
130 |
+
if st.session_state.hf_logged_in or hf_token:
|
131 |
+
# Predefined popular models
|
132 |
+
hf_model_options = [
|
133 |
+
"mistralai/Mistral-Small-24B-Instruct-2501",
|
134 |
+
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
135 |
+
"google/codegemma-7b",
|
136 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
137 |
+
"microsoft/Phi-4-multimodal-instruct",
|
138 |
+
"nvidia/Llama-3.3-70B-Instruct-FP4",
|
139 |
+
"Other (specify)",
|
140 |
+
]
|
141 |
+
|
142 |
+
selected_hf_model = st.selectbox("Select Hugging Face Model", hf_model_options)
|
143 |
+
|
144 |
+
# Custom model input
|
145 |
+
if selected_hf_model == "Other (specify)":
|
146 |
+
custom_hf_model = st.text_input(
|
147 |
+
"Enter model name (e.g., organization/model-name)"
|
148 |
+
)
|
149 |
+
selected_hf_model = (
|
150 |
+
custom_hf_model if custom_hf_model else "gpt2"
|
151 |
+
) # Default to gpt2 if empty
|
152 |
+
|
153 |
+
# User message input for HF
|
154 |
+
hf_user_message = st.text_area(
|
155 |
+
"Enter your message here",
|
156 |
+
placeholder="Hello, world!",
|
157 |
+
height=200,
|
158 |
+
key="hf_message",
|
159 |
+
)
|
160 |
+
|
161 |
+
# Button to count tokens for HF
|
162 |
+
if st.button("Count Tokens (Hugging Face)"):
|
163 |
+
if not hf_user_message:
|
164 |
+
st.warning("Please enter a message to count tokens")
|
165 |
+
else:
|
166 |
+
try:
|
167 |
+
with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
|
168 |
+
tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
|
169 |
+
|
170 |
+
# Count tokens in different ways
|
171 |
+
tokens = tokenizer.tokenize(hf_user_message)
|
172 |
+
token_ids = tokenizer.encode(hf_user_message)
|
173 |
+
|
174 |
+
# Display results
|
175 |
+
st.success(f"Token count: {len(tokens)}")
|
176 |
+
st.success(f"Token IDs count: {len(token_ids)}")
|
177 |
+
|
178 |
+
# Show the actual tokens
|
179 |
+
with st.expander("View Token Details"):
|
180 |
+
col1, col2 = st.columns(2)
|
181 |
+
with col1:
|
182 |
+
st.subheader("Tokens")
|
183 |
+
st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
|
184 |
+
with col2:
|
185 |
+
st.subheader("Token IDs")
|
186 |
+
st.json(
|
187 |
+
[
|
188 |
+
f"{i}: {token_id}"
|
189 |
+
for i, token_id in enumerate(token_ids)
|
190 |
+
]
|
191 |
+
)
|
192 |
+
|
193 |
+
except Exception as e:
|
194 |
+
st.error(f"An error occurred: {str(e)}")
|
195 |
+
|
196 |
+
# Additional information
|
197 |
+
with st.expander("About Token Counting"):
|
198 |
+
st.markdown("""
|
199 |
+
### What are tokens?
|
200 |
+
|
201 |
+
Tokens are chunks of text that language models process. They can be parts of words, whole words,
|
202 |
+
or even punctuation. Different models tokenize text differently.
|
203 |
+
|
204 |
+
### Why count tokens?
|
205 |
+
|
206 |
+
- **Cost Management**: Understanding token usage helps manage API costs
|
207 |
+
- **Model Limitations**: Different models have different token limits
|
208 |
+
- **Performance Optimization**: Helps optimize prompts for better responses
|
209 |
+
|
210 |
+
### Token Counting Tips
|
211 |
+
|
212 |
+
- Shorter messages use fewer tokens
|
213 |
+
- Special formatting, code blocks, and unusual characters may use more tokens
|
214 |
+
- For Claude models, the system message also counts toward your token usage
|
215 |
+
- Hugging Face models may tokenize text differently than Anthropic models
|
216 |
+
""")
|
217 |
+
|
218 |
+
# Footer
|
219 |
+
st.markdown("---")
|
220 |
+
st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
huggingface_hub
|
3 |
+
transformers
|
4 |
+
anthropic
|