Timothy S. Phan commited on
Commit
176f432
·
1 Parent(s): f4147c3

Splits into different pages to handle different models

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [client]
2
+ showSidebarNavigation = false
app.py CHANGED
@@ -1,220 +1,30 @@
1
  import streamlit as st
2
- import anthropic
3
- import json
4
- import os
5
- from huggingface_hub import login
6
- from transformers import AutoTokenizer
7
 
8
- st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="wide")
 
 
 
 
9
 
10
- st.title("🎈 LLM Token Counter")
11
- st.markdown(
12
- "This app counts tokens for different language models based on your input text."
13
- )
14
 
15
- # Tabs for model provider selection
16
- provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
17
 
18
- with provider_tab[0]: # Anthropic Models
19
- st.header("Anthropic (Claude) Models")
20
 
21
- # API key input (with warning about security)
22
- anthropic_key = st.text_input(
23
- "Enter your Anthropic API Key",
24
- type="password",
25
- help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
26
- )
27
 
28
- # If no key provided, try to get from environment
29
- if not anthropic_key:
30
- anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
31
 
32
- # Model selection for Anthropic
33
- anthropic_model_options = {
34
- "Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
35
- "Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
36
- "Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
37
- "Claude 3 Haiku": "claude-3-haiku-20240307",
38
- "Claude 3 Opus": "claude-3-opus-20240229",
39
- }
40
 
41
- selected_anthropic_model = st.selectbox(
42
- "Select Claude Model", list(anthropic_model_options.keys())
43
- )
44
-
45
- # System message (optional)
46
- st.subheader("System Message (Optional)")
47
- system_message = st.text_area(
48
- "System Message", placeholder="e.g., You are a helpful assistant", height=100
49
- )
50
-
51
- # User message input
52
- st.subheader("Message Content")
53
- anthropic_user_message = st.text_area(
54
- "Enter your message here",
55
- placeholder="Hello, Claude! How are you today?",
56
- height=200,
57
- key="anthropic_message",
58
- )
59
-
60
- # Button to count tokens for Anthropic
61
- if st.button("Count Tokens (Anthropic)"):
62
- if not anthropic_key:
63
- st.error(
64
- "No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
65
- )
66
- elif not anthropic_user_message:
67
- st.warning("Please enter a message to count tokens")
68
- else:
69
- try:
70
- # Initialize client with API key
71
- client = anthropic.Anthropic(api_key=anthropic_key)
72
-
73
- # Create the request
74
- count_request = {
75
- "model": anthropic_model_options[selected_anthropic_model],
76
- "messages": [{"role": "user", "content": anthropic_user_message}],
77
- }
78
-
79
- # Add system message if provided
80
- if system_message:
81
- count_request["system"] = system_message
82
-
83
- # Make the API call to count tokens
84
- response = client.messages.count_tokens(**count_request)
85
-
86
- # Display results
87
- st.success(f"Input tokens: {response.input_tokens}")
88
-
89
- # Display the full JSON response in an expandable section
90
- with st.expander("View Full API Response"):
91
- st.code(
92
- json.dumps(response.model_dump(), indent=2), language="json"
93
- )
94
-
95
- except Exception as e:
96
- st.error(f"An error occurred: {str(e)}")
97
-
98
- with provider_tab[1]: # Hugging Face Models
99
- st.header("Hugging Face Models")
100
-
101
- # HF Token input
102
- hf_token = st.text_input(
103
- "Enter your Hugging Face Token",
104
- type="password",
105
- help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
106
- )
107
-
108
- # If no token provided, try to get from environment
109
- if not hf_token:
110
- hf_token = os.environ.get("HF_TOKEN", "")
111
-
112
- # Login status tracker
113
- if "hf_logged_in" not in st.session_state:
114
- st.session_state.hf_logged_in = False
115
-
116
- # Login button
117
- if not st.session_state.hf_logged_in and st.button("Login to Hugging Face"):
118
- if not hf_token:
119
- st.error(
120
- "No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
121
- )
122
- else:
123
- try:
124
- login(token=hf_token)
125
- st.session_state.hf_logged_in = True
126
- st.success("Successfully logged in to Hugging Face")
127
- except Exception as e:
128
- st.error(f"Login failed: {str(e)}")
129
-
130
- if st.session_state.hf_logged_in or hf_token:
131
- # Predefined popular models
132
- hf_model_options = [
133
- "mistralai/Mistral-Small-24B-Instruct-2501",
134
- "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
135
- "google/codegemma-7b",
136
- "Qwen/Qwen2.5-Coder-32B-Instruct",
137
- "microsoft/Phi-4-multimodal-instruct",
138
- "nvidia/Llama-3.3-70B-Instruct-FP4",
139
- "Other (specify)",
140
- ]
141
-
142
- selected_hf_model = st.selectbox("Select Hugging Face Model", hf_model_options)
143
-
144
- # Custom model input
145
- if selected_hf_model == "Other (specify)":
146
- custom_hf_model = st.text_input(
147
- "Enter model name (e.g., organization/model-name)"
148
- )
149
- selected_hf_model = (
150
- custom_hf_model if custom_hf_model else "gpt2"
151
- ) # Default to gpt2 if empty
152
-
153
- # User message input for HF
154
- hf_user_message = st.text_area(
155
- "Enter your message here",
156
- placeholder="Hello, world!",
157
- height=200,
158
- key="hf_message",
159
- )
160
-
161
- # Button to count tokens for HF
162
- if st.button("Count Tokens (Hugging Face)"):
163
- if not hf_user_message:
164
- st.warning("Please enter a message to count tokens")
165
- else:
166
- try:
167
- with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
168
- tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
169
-
170
- # Count tokens in different ways
171
- tokens = tokenizer.tokenize(hf_user_message)
172
- token_ids = tokenizer.encode(hf_user_message)
173
-
174
- # Display results
175
- st.success(f"Token count: {len(tokens)}")
176
- st.success(f"Token IDs count: {len(token_ids)}")
177
-
178
- # Show the actual tokens
179
- with st.expander("View Token Details"):
180
- col1, col2 = st.columns(2)
181
- with col1:
182
- st.subheader("Tokens")
183
- st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
184
- with col2:
185
- st.subheader("Token IDs")
186
- st.json(
187
- [
188
- f"{i}: {token_id}"
189
- for i, token_id in enumerate(token_ids)
190
- ]
191
- )
192
-
193
- except Exception as e:
194
- st.error(f"An error occurred: {str(e)}")
195
-
196
- # Additional information
197
- with st.expander("About Token Counting"):
198
- st.markdown("""
199
- ### What are tokens?
200
-
201
- Tokens are chunks of text that language models process. They can be parts of words, whole words,
202
- or even punctuation. Different models tokenize text differently.
203
-
204
- ### Why count tokens?
205
-
206
- - **Cost Management**: Understanding token usage helps manage API costs
207
- - **Model Limitations**: Different models have different token limits
208
- - **Performance Optimization**: Helps optimize prompts for better responses
209
-
210
- ### Token Counting Tips
211
-
212
- - Shorter messages use fewer tokens
213
- - Special formatting, code blocks, and unusual characters may use more tokens
214
- - For Claude models, the system message also counts toward your token usage
215
- - Hugging Face models may tokenize text differently than Anthropic models
216
- """)
217
-
218
- # Footer
219
- st.markdown("---")
220
- st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")
 
1
  import streamlit as st
2
+ from pages import anthropic_models, huggingface_models
3
+ from utils import set_page_config, display_about_token_counting, display_footer
 
 
 
4
 
5
+ def main():
6
+ """
7
+ Main entry point for the Streamlit application.
8
+ """
9
+ set_page_config()
10
 
11
+ st.title("🎈 LLM Token Counter")
12
+ st.markdown("This app counts tokens for different language models based on your input text.")
 
 
13
 
14
+ # Tabs for model provider selection
15
+ provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
16
 
17
+ with provider_tab[0]: # Anthropic Models
18
+ anthropic_models.display()
19
 
20
+ with provider_tab[1]: # Hugging Face Models
21
+ huggingface_models.display()
 
 
 
 
22
 
23
+ # Additional information
24
+ display_about_token_counting()
 
25
 
26
+ # Footer
27
+ display_footer()
 
 
 
 
 
 
28
 
29
+ if __name__ == "__main__":
30
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.py ADDED
File without changes
pages/__init__.py ADDED
File without changes
pages/anthropic_models.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic
3
+ import json
4
+ import os
5
+ from utils import ANTHROPIC_MODEL_OPTIONS
6
+
7
+ def display():
8
+ """
9
+ Display the Anthropic models tab.
10
+ """
11
+ st.header("Anthropic (Claude) Models")
12
+
13
+ # API key input (with warning about security)
14
+ anthropic_key = st.text_input(
15
+ "Enter your Anthropic API Key",
16
+ type="password",
17
+ help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
18
+ )
19
+
20
+ # If no key provided, try to get from environment
21
+ if not anthropic_key:
22
+ anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
23
+
24
+ # Model selection for Anthropic
25
+ selected_anthropic_model = st.selectbox(
26
+ "Select Claude Model", list(ANTHROPIC_MODEL_OPTIONS.keys())
27
+ )
28
+
29
+ # System message (optional)
30
+ st.subheader("System Message (Optional)")
31
+ system_message = st.text_area(
32
+ "System Message", placeholder="e.g., You are a helpful assistant", height=100
33
+ )
34
+
35
+ # User message input
36
+ st.subheader("Message Content")
37
+ anthropic_user_message = st.text_area(
38
+ "Enter your message here",
39
+ placeholder="Hello, Claude! How are you today?",
40
+ height=200,
41
+ key="anthropic_message",
42
+ )
43
+
44
+ # Button to count tokens for Anthropic
45
+ if st.button("Count Tokens (Anthropic)"):
46
+ if not anthropic_key:
47
+ st.error(
48
+ "No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
49
+ )
50
+ elif not anthropic_user_message:
51
+ st.warning("Please enter a message to count tokens")
52
+ else:
53
+ try:
54
+ # Initialize client with API key
55
+ client = anthropic.Anthropic(api_key=anthropic_key)
56
+
57
+ # Create the request
58
+ count_request = {
59
+ "model": ANTHROPIC_MODEL_OPTIONS[selected_anthropic_model],
60
+ "messages": [{"role": "user", "content": anthropic_user_message}],
61
+ }
62
+
63
+ # Add system message if provided
64
+ if system_message:
65
+ count_request["system"] = system_message
66
+
67
+ # Make the API call to count tokens
68
+ response = client.messages.count_tokens(**count_request)
69
+
70
+ # Display results
71
+ st.success(f"Input tokens: {response.input_tokens}")
72
+
73
+ # Display the full JSON response in an expandable section
74
+ with st.expander("View Full API Response"):
75
+ st.code(
76
+ json.dumps(response.model_dump(), indent=2), language="json"
77
+ )
78
+
79
+ except Exception as e:
80
+ st.error(f"An error occurred: {str(e)}")
pages/huggingface_models.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from huggingface_hub import login
3
+ from transformers import AutoTokenizer
4
+ import os
5
+ from utils import HF_MODEL_OPTIONS
6
+
7
+ def display():
8
+ """
9
+ Display the Hugging Face models tab.
10
+ """
11
+ st.header("Hugging Face Models")
12
+
13
+ # HF Token input
14
+ hf_token = st.text_input(
15
+ "Enter your Hugging Face Token",
16
+ type="password",
17
+ help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
18
+ )
19
+
20
+ # If no token provided, try to get from environment
21
+ if not hf_token:
22
+ hf_token = os.environ.get("HF_TOKEN", "")
23
+
24
+ # Login status tracker
25
+ if "hf_logged_in" not in st.session_state:
26
+ st.session_state.hf_logged_in = False
27
+
28
+ # Login button
29
+ if not st.session_state.hf_logged_in:
30
+ if st.button("Login to Hugging Face"):
31
+ if not hf_token:
32
+ st.error(
33
+ "No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
34
+ )
35
+ else:
36
+ try:
37
+ login(token=hf_token)
38
+ st.session_state.hf_logged_in = True
39
+ st.session_state.hf_token = hf_token # Store the token in session state
40
+ st.success("Successfully logged in to Hugging Face")
41
+ except Exception as e:
42
+ st.error(f"Login failed: {str(e)}")
43
+ else:
44
+ st.success("Logged in to Hugging Face")
45
+
46
+ # Logout button
47
+ if st.button("Logout"):
48
+ st.session_state.hf_logged_in = False
49
+ st.session_state.hf_token = "" # Clear the token from session state
50
+ st.success("Successfully logged out from Hugging Face")
51
+ st.experimental_rerun() # Rerun the script to refresh the UI
52
+
53
+ if st.session_state.hf_logged_in or hf_token:
54
+ # Predefined popular models
55
+ selected_hf_model = st.selectbox("Select Hugging Face Model", HF_MODEL_OPTIONS)
56
+
57
+ # Custom model input
58
+ if selected_hf_model == "Other (specify)":
59
+ custom_hf_model = st.text_input(
60
+ "Enter model name (e.g., organization/model-name)"
61
+ )
62
+ selected_hf_model = (
63
+ custom_hf_model if custom_hf_model else "gpt2"
64
+ ) # Default to gpt2 if empty
65
+
66
+ # User message input for HF
67
+ hf_user_message = st.text_area(
68
+ "Enter your message here",
69
+ placeholder="Hello, world!",
70
+ height=200,
71
+ key="hf_message",
72
+ )
73
+
74
+ # Button to count tokens for HF
75
+ if st.button("Count Tokens (Hugging Face)"):
76
+ if not hf_user_message:
77
+ st.warning("Please enter a message to count tokens")
78
+ else:
79
+ try:
80
+ with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
81
+ tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
82
+
83
+ # Count tokens in different ways
84
+ tokens = tokenizer.tokenize(hf_user_message)
85
+ token_ids = tokenizer.encode(hf_user_message)
86
+
87
+ # Display results
88
+ st.success(f"Token count: {len(tokens)}")
89
+ st.success(f"Token IDs count: {len(token_ids)}")
90
+
91
+ # Show the actual tokens
92
+ with st.expander("View Token Details"):
93
+ col1, col2 = st.columns(2)
94
+ with col1:
95
+ st.subheader("Tokens")
96
+ st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
97
+ with col2:
98
+ st.subheader("Token IDs")
99
+ st.json(
100
+ [
101
+ f"{i}: {token_id}"
102
+ for i, token_id in enumerate(token_ids)
103
+ ]
104
+ )
105
+
106
+ except Exception as e:
107
+ st.error(f"An error occurred: {str(e)}")
utils.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def set_page_config():
4
+ """
5
+ Set the page configuration for the Streamlit app.
6
+ """
7
+ st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="centered")
8
+
9
+ def display_about_token_counting():
10
+ """
11
+ Display additional information about token counting.
12
+ """
13
+ with st.expander("About Token Counting"):
14
+ st.markdown("""
15
+ ### What are tokens?
16
+
17
+ Tokens are chunks of text that language models process. They can be parts of words, whole words,
18
+ or even punctuation. Different models tokenize text differently.
19
+
20
+ ### Why count tokens?
21
+
22
+ - **Cost Management**: Understanding token usage helps manage API costs
23
+ - **Model Limitations**: Different models have different token limits
24
+ - **Performance Optimization**: Helps optimize prompts for better responses
25
+
26
+ ### Token Counting Tips
27
+
28
+ - Shorter messages use fewer tokens
29
+ - Special formatting, code blocks, and unusual characters may use more tokens
30
+ - For Claude models, the system message also counts toward your token usage
31
+ - Hugging Face models may tokenize text differently than Anthropic models
32
+ """)
33
+
34
+ def display_footer():
35
+ """
36
+ Display the footer of the Streamlit app.
37
+ """
38
+ st.markdown("---")
39
+ st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")
40
+
41
+ # Constants for model options
42
+ ANTHROPIC_MODEL_OPTIONS = {
43
+ "Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
44
+ "Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
45
+ "Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
46
+ "Claude 3 Haiku": "claude-3-haiku-20240307",
47
+ "Claude 3 Opus": "claude-3-opus-20240229",
48
+ }
49
+
50
+ HF_MODEL_OPTIONS = [
51
+ "mistralai/Mistral-Small-24B-Instruct-2501",
52
+ "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
53
+ "google/codegemma-7b",
54
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
55
+ "microsoft/Phi-4-multimodal-instruct",
56
+ "nvidia/Llama-3.3-70B-Instruct-FP4",
57
+ "Other (specify)",
58
+ ]