Timothy S. Phan commited on
Commit
f4147c3
·
1 Parent(s): 7b0e2d6

adds iniital streamlit app to hf spaces

Browse files
Files changed (2) hide show
  1. app.py +220 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic
3
+ import json
4
+ import os
5
+ from huggingface_hub import login
6
+ from transformers import AutoTokenizer
7
+
8
+ st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="wide")
9
+
10
+ st.title("🎈 LLM Token Counter")
11
+ st.markdown(
12
+ "This app counts tokens for different language models based on your input text."
13
+ )
14
+
15
+ # Tabs for model provider selection
16
+ provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
17
+
18
+ with provider_tab[0]: # Anthropic Models
19
+ st.header("Anthropic (Claude) Models")
20
+
21
+ # API key input (with warning about security)
22
+ anthropic_key = st.text_input(
23
+ "Enter your Anthropic API Key",
24
+ type="password",
25
+ help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
26
+ )
27
+
28
+ # If no key provided, try to get from environment
29
+ if not anthropic_key:
30
+ anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
31
+
32
+ # Model selection for Anthropic
33
+ anthropic_model_options = {
34
+ "Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
35
+ "Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
36
+ "Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
37
+ "Claude 3 Haiku": "claude-3-haiku-20240307",
38
+ "Claude 3 Opus": "claude-3-opus-20240229",
39
+ }
40
+
41
+ selected_anthropic_model = st.selectbox(
42
+ "Select Claude Model", list(anthropic_model_options.keys())
43
+ )
44
+
45
+ # System message (optional)
46
+ st.subheader("System Message (Optional)")
47
+ system_message = st.text_area(
48
+ "System Message", placeholder="e.g., You are a helpful assistant", height=100
49
+ )
50
+
51
+ # User message input
52
+ st.subheader("Message Content")
53
+ anthropic_user_message = st.text_area(
54
+ "Enter your message here",
55
+ placeholder="Hello, Claude! How are you today?",
56
+ height=200,
57
+ key="anthropic_message",
58
+ )
59
+
60
+ # Button to count tokens for Anthropic
61
+ if st.button("Count Tokens (Anthropic)"):
62
+ if not anthropic_key:
63
+ st.error(
64
+ "No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
65
+ )
66
+ elif not anthropic_user_message:
67
+ st.warning("Please enter a message to count tokens")
68
+ else:
69
+ try:
70
+ # Initialize client with API key
71
+ client = anthropic.Anthropic(api_key=anthropic_key)
72
+
73
+ # Create the request
74
+ count_request = {
75
+ "model": anthropic_model_options[selected_anthropic_model],
76
+ "messages": [{"role": "user", "content": anthropic_user_message}],
77
+ }
78
+
79
+ # Add system message if provided
80
+ if system_message:
81
+ count_request["system"] = system_message
82
+
83
+ # Make the API call to count tokens
84
+ response = client.messages.count_tokens(**count_request)
85
+
86
+ # Display results
87
+ st.success(f"Input tokens: {response.input_tokens}")
88
+
89
+ # Display the full JSON response in an expandable section
90
+ with st.expander("View Full API Response"):
91
+ st.code(
92
+ json.dumps(response.model_dump(), indent=2), language="json"
93
+ )
94
+
95
+ except Exception as e:
96
+ st.error(f"An error occurred: {str(e)}")
97
+
98
+ with provider_tab[1]: # Hugging Face Models
99
+ st.header("Hugging Face Models")
100
+
101
+ # HF Token input
102
+ hf_token = st.text_input(
103
+ "Enter your Hugging Face Token",
104
+ type="password",
105
+ help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
106
+ )
107
+
108
+ # If no token provided, try to get from environment
109
+ if not hf_token:
110
+ hf_token = os.environ.get("HF_TOKEN", "")
111
+
112
+ # Login status tracker
113
+ if "hf_logged_in" not in st.session_state:
114
+ st.session_state.hf_logged_in = False
115
+
116
+ # Login button
117
+ if not st.session_state.hf_logged_in and st.button("Login to Hugging Face"):
118
+ if not hf_token:
119
+ st.error(
120
+ "No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
121
+ )
122
+ else:
123
+ try:
124
+ login(token=hf_token)
125
+ st.session_state.hf_logged_in = True
126
+ st.success("Successfully logged in to Hugging Face")
127
+ except Exception as e:
128
+ st.error(f"Login failed: {str(e)}")
129
+
130
+ if st.session_state.hf_logged_in or hf_token:
131
+ # Predefined popular models
132
+ hf_model_options = [
133
+ "mistralai/Mistral-Small-24B-Instruct-2501",
134
+ "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
135
+ "google/codegemma-7b",
136
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
137
+ "microsoft/Phi-4-multimodal-instruct",
138
+ "nvidia/Llama-3.3-70B-Instruct-FP4",
139
+ "Other (specify)",
140
+ ]
141
+
142
+ selected_hf_model = st.selectbox("Select Hugging Face Model", hf_model_options)
143
+
144
+ # Custom model input
145
+ if selected_hf_model == "Other (specify)":
146
+ custom_hf_model = st.text_input(
147
+ "Enter model name (e.g., organization/model-name)"
148
+ )
149
+ selected_hf_model = (
150
+ custom_hf_model if custom_hf_model else "gpt2"
151
+ ) # Default to gpt2 if empty
152
+
153
+ # User message input for HF
154
+ hf_user_message = st.text_area(
155
+ "Enter your message here",
156
+ placeholder="Hello, world!",
157
+ height=200,
158
+ key="hf_message",
159
+ )
160
+
161
+ # Button to count tokens for HF
162
+ if st.button("Count Tokens (Hugging Face)"):
163
+ if not hf_user_message:
164
+ st.warning("Please enter a message to count tokens")
165
+ else:
166
+ try:
167
+ with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
168
+ tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
169
+
170
+ # Count tokens in different ways
171
+ tokens = tokenizer.tokenize(hf_user_message)
172
+ token_ids = tokenizer.encode(hf_user_message)
173
+
174
+ # Display results
175
+ st.success(f"Token count: {len(tokens)}")
176
+ st.success(f"Token IDs count: {len(token_ids)}")
177
+
178
+ # Show the actual tokens
179
+ with st.expander("View Token Details"):
180
+ col1, col2 = st.columns(2)
181
+ with col1:
182
+ st.subheader("Tokens")
183
+ st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
184
+ with col2:
185
+ st.subheader("Token IDs")
186
+ st.json(
187
+ [
188
+ f"{i}: {token_id}"
189
+ for i, token_id in enumerate(token_ids)
190
+ ]
191
+ )
192
+
193
+ except Exception as e:
194
+ st.error(f"An error occurred: {str(e)}")
195
+
196
+ # Additional information
197
+ with st.expander("About Token Counting"):
198
+ st.markdown("""
199
+ ### What are tokens?
200
+
201
+ Tokens are chunks of text that language models process. They can be parts of words, whole words,
202
+ or even punctuation. Different models tokenize text differently.
203
+
204
+ ### Why count tokens?
205
+
206
+ - **Cost Management**: Understanding token usage helps manage API costs
207
+ - **Model Limitations**: Different models have different token limits
208
+ - **Performance Optimization**: Helps optimize prompts for better responses
209
+
210
+ ### Token Counting Tips
211
+
212
+ - Shorter messages use fewer tokens
213
+ - Special formatting, code blocks, and unusual characters may use more tokens
214
+ - For Claude models, the system message also counts toward your token usage
215
+ - Hugging Face models may tokenize text differently than Anthropic models
216
+ """)
217
+
218
+ # Footer
219
+ st.markdown("---")
220
+ st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ huggingface_hub
3
+ transformers
4
+ anthropic