Spaces:

tsphan
/

llm-token-counter

Running

App Files Files Community

Timothy S. Phan commited on 7 days ago

Commit

176f432

1 Parent(s): f4147c3

Splits into different pages to handle different models

Browse files

Files changed (7) hide show

.streamlit/config.toml +2 -0
app.py +21 -211
config.py +0 -0
pages/__init__.py +0 -0
pages/anthropic_models.py +80 -0
pages/huggingface_models.py +107 -0
utils.py +58 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [client]
2	+ showSidebarNavigation = false

app.py CHANGED Viewed

@@ -1,220 +1,30 @@
 import streamlit as st
-import anthropic
-import json
-import os
-from huggingface_hub import login
-from transformers import AutoTokenizer
-st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="wide")
-st.title("🎈 LLM Token Counter")
-st.markdown(
-    "This app counts tokens for different language models based on your input text."
-)
-# Tabs for model provider selection
-provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
-with provider_tab[0]:  # Anthropic Models
-    st.header("Anthropic (Claude) Models")
-    # API key input (with warning about security)
-    anthropic_key = st.text_input(
-        "Enter your Anthropic API Key",
-        type="password",
-        help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
-    )
-    # If no key provided, try to get from environment
-    if not anthropic_key:
-        anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
-    # Model selection for Anthropic
-    anthropic_model_options = {
-        "Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
-        "Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
-        "Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
-        "Claude 3 Haiku": "claude-3-haiku-20240307",
-        "Claude 3 Opus": "claude-3-opus-20240229",
-    }
-    selected_anthropic_model = st.selectbox(
-        "Select Claude Model", list(anthropic_model_options.keys())
-    )
-    # System message (optional)
-    st.subheader("System Message (Optional)")
-    system_message = st.text_area(
-        "System Message", placeholder="e.g., You are a helpful assistant", height=100
-    )
-    # User message input
-    st.subheader("Message Content")
-    anthropic_user_message = st.text_area(
-        "Enter your message here",
-        placeholder="Hello, Claude! How are you today?",
-        height=200,
-        key="anthropic_message",
-    )
-    # Button to count tokens for Anthropic
-    if st.button("Count Tokens (Anthropic)"):
-        if not anthropic_key:
-            st.error(
-                "No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
-            )
-        elif not anthropic_user_message:
-            st.warning("Please enter a message to count tokens")
-        else:
-            try:
-                # Initialize client with API key
-                client = anthropic.Anthropic(api_key=anthropic_key)
-                # Create the request
-                count_request = {
-                    "model": anthropic_model_options[selected_anthropic_model],
-                    "messages": [{"role": "user", "content": anthropic_user_message}],
-                }
-                # Add system message if provided
-                if system_message:
-                    count_request["system"] = system_message
-                # Make the API call to count tokens
-                response = client.messages.count_tokens(**count_request)
-                # Display results
-                st.success(f"Input tokens: {response.input_tokens}")
-                # Display the full JSON response in an expandable section
-                with st.expander("View Full API Response"):
-                    st.code(
-                        json.dumps(response.model_dump(), indent=2), language="json"
-                    )
-            except Exception as e:
-                st.error(f"An error occurred: {str(e)}")
-with provider_tab[1]:  # Hugging Face Models
-    st.header("Hugging Face Models")
-    # HF Token input
-    hf_token = st.text_input(
-        "Enter your Hugging Face Token",
-        type="password",
-        help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
-    )
-    # If no token provided, try to get from environment
-    if not hf_token:
-        hf_token = os.environ.get("HF_TOKEN", "")
-    # Login status tracker
-    if "hf_logged_in" not in st.session_state:
-        st.session_state.hf_logged_in = False
-    # Login button
-    if not st.session_state.hf_logged_in and st.button("Login to Hugging Face"):
-        if not hf_token:
-            st.error(
-                "No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
-            )
-        else:
-            try:
-                login(token=hf_token)
-                st.session_state.hf_logged_in = True
-                st.success("Successfully logged in to Hugging Face")
-            except Exception as e:
-                st.error(f"Login failed: {str(e)}")
-    if st.session_state.hf_logged_in or hf_token:
-        # Predefined popular models
-        hf_model_options = [
-            "mistralai/Mistral-Small-24B-Instruct-2501",
-            "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
-            "google/codegemma-7b",
-            "Qwen/Qwen2.5-Coder-32B-Instruct",
-            "microsoft/Phi-4-multimodal-instruct",
-            "nvidia/Llama-3.3-70B-Instruct-FP4",
-            "Other (specify)",
-        ]
-        selected_hf_model = st.selectbox("Select Hugging Face Model", hf_model_options)
-        # Custom model input
-        if selected_hf_model == "Other (specify)":
-            custom_hf_model = st.text_input(
-                "Enter model name (e.g., organization/model-name)"
-            )
-            selected_hf_model = (
-                custom_hf_model if custom_hf_model else "gpt2"
-            )  # Default to gpt2 if empty
-        # User message input for HF
-        hf_user_message = st.text_area(
-            "Enter your message here",
-            placeholder="Hello, world!",
-            height=200,
-            key="hf_message",
-        )
-        # Button to count tokens for HF
-        if st.button("Count Tokens (Hugging Face)"):
-            if not hf_user_message:
-                st.warning("Please enter a message to count tokens")
-            else:
-                try:
-                    with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
-                        tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
-                    # Count tokens in different ways
-                    tokens = tokenizer.tokenize(hf_user_message)
-                    token_ids = tokenizer.encode(hf_user_message)
-                    # Display results
-                    st.success(f"Token count: {len(tokens)}")
-                    st.success(f"Token IDs count: {len(token_ids)}")
-                    # Show the actual tokens
-                    with st.expander("View Token Details"):
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            st.subheader("Tokens")
-                            st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
-                        with col2:
-                            st.subheader("Token IDs")
-                            st.json(
-                                [
-                                    f"{i}: {token_id}"
-                                    for i, token_id in enumerate(token_ids)
-                                ]
-                            )
-                except Exception as e:
-                    st.error(f"An error occurred: {str(e)}")
-# Additional information
-with st.expander("About Token Counting"):
-    st.markdown("""
-    ### What are tokens?
-    Tokens are chunks of text that language models process. They can be parts of words, whole words,
-    or even punctuation. Different models tokenize text differently.
-    ### Why count tokens?
-    - **Cost Management**: Understanding token usage helps manage API costs
-    - **Model Limitations**: Different models have different token limits
-    - **Performance Optimization**: Helps optimize prompts for better responses
-    ### Token Counting Tips
-    - Shorter messages use fewer tokens
-    - Special formatting, code blocks, and unusual characters may use more tokens
-    - For Claude models, the system message also counts toward your token usage
-    - Hugging Face models may tokenize text differently than Anthropic models
-    """)
-# Footer
-st.markdown("---")
-st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")

 import streamlit as st
+from pages import anthropic_models, huggingface_models
+from utils import set_page_config, display_about_token_counting, display_footer
+def main():
+    """
+    Main entry point for the Streamlit application.
+    """
+    set_page_config()
+    st.title("🎈 LLM Token Counter")
+    st.markdown("This app counts tokens for different language models based on your input text.")
+    # Tabs for model provider selection
+    provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
+    with provider_tab[0]:  # Anthropic Models
+        anthropic_models.display()
+    with provider_tab[1]:  # Hugging Face Models
+        huggingface_models.display()
+    # Additional information
+    display_about_token_counting()
+    # Footer
+    display_footer()
+if __name__ == "__main__":
+    main()

config.py ADDED Viewed

File without changes

pages/__init__.py ADDED Viewed

File without changes

pages/anthropic_models.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import streamlit as st
+import anthropic
+import json
+import os
+from utils import ANTHROPIC_MODEL_OPTIONS
+def display():
+    """
+    Display the Anthropic models tab.
+    """
+    st.header("Anthropic (Claude) Models")
+    # API key input (with warning about security)
+    anthropic_key = st.text_input(
+        "Enter your Anthropic API Key",
+        type="password",
+        help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
+    )
+    # If no key provided, try to get from environment
+    if not anthropic_key:
+        anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    # Model selection for Anthropic
+    selected_anthropic_model = st.selectbox(
+        "Select Claude Model", list(ANTHROPIC_MODEL_OPTIONS.keys())
+    )
+    # System message (optional)
+    st.subheader("System Message (Optional)")
+    system_message = st.text_area(
+        "System Message", placeholder="e.g., You are a helpful assistant", height=100
+    )
+    # User message input
+    st.subheader("Message Content")
+    anthropic_user_message = st.text_area(
+        "Enter your message here",
+        placeholder="Hello, Claude! How are you today?",
+        height=200,
+        key="anthropic_message",
+    )
+    # Button to count tokens for Anthropic
+    if st.button("Count Tokens (Anthropic)"):
+        if not anthropic_key:
+            st.error(
+                "No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
+            )
+        elif not anthropic_user_message:
+            st.warning("Please enter a message to count tokens")
+        else:
+            try:
+                # Initialize client with API key
+                client = anthropic.Anthropic(api_key=anthropic_key)
+                # Create the request
+                count_request = {
+                    "model": ANTHROPIC_MODEL_OPTIONS[selected_anthropic_model],
+                    "messages": [{"role": "user", "content": anthropic_user_message}],
+                }
+                # Add system message if provided
+                if system_message:
+                    count_request["system"] = system_message
+                # Make the API call to count tokens
+                response = client.messages.count_tokens(**count_request)
+                # Display results
+                st.success(f"Input tokens: {response.input_tokens}")
+                # Display the full JSON response in an expandable section
+                with st.expander("View Full API Response"):
+                    st.code(
+                        json.dumps(response.model_dump(), indent=2), language="json"
+                    )
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}")

pages/huggingface_models.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import streamlit as st
+from huggingface_hub import login
+from transformers import AutoTokenizer
+import os
+from utils import HF_MODEL_OPTIONS
+def display():
+    """
+    Display the Hugging Face models tab.
+    """
+    st.header("Hugging Face Models")
+    # HF Token input
+    hf_token = st.text_input(
+        "Enter your Hugging Face Token",
+        type="password",
+        help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
+    )
+    # If no token provided, try to get from environment
+    if not hf_token:
+        hf_token = os.environ.get("HF_TOKEN", "")
+    # Login status tracker
+    if "hf_logged_in" not in st.session_state:
+        st.session_state.hf_logged_in = False
+    # Login button
+    if not st.session_state.hf_logged_in:
+        if st.button("Login to Hugging Face"):
+            if not hf_token:
+                st.error(
+                    "No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
+                )
+            else:
+                try:
+                    login(token=hf_token)
+                    st.session_state.hf_logged_in = True
+                    st.session_state.hf_token = hf_token  # Store the token in session state
+                    st.success("Successfully logged in to Hugging Face")
+                except Exception as e:
+                    st.error(f"Login failed: {str(e)}")
+    else:
+        st.success("Logged in to Hugging Face")
+        # Logout button
+        if st.button("Logout"):
+            st.session_state.hf_logged_in = False
+            st.session_state.hf_token = ""  # Clear the token from session state
+            st.success("Successfully logged out from Hugging Face")
+            st.experimental_rerun()  # Rerun the script to refresh the UI
+    if st.session_state.hf_logged_in or hf_token:
+        # Predefined popular models
+        selected_hf_model = st.selectbox("Select Hugging Face Model", HF_MODEL_OPTIONS)
+        # Custom model input
+        if selected_hf_model == "Other (specify)":
+            custom_hf_model = st.text_input(
+                "Enter model name (e.g., organization/model-name)"
+            )
+            selected_hf_model = (
+                custom_hf_model if custom_hf_model else "gpt2"
+            )  # Default to gpt2 if empty
+        # User message input for HF
+        hf_user_message = st.text_area(
+            "Enter your message here",
+            placeholder="Hello, world!",
+            height=200,
+            key="hf_message",
+        )
+        # Button to count tokens for HF
+        if st.button("Count Tokens (Hugging Face)"):
+            if not hf_user_message:
+                st.warning("Please enter a message to count tokens")
+            else:
+                try:
+                    with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
+                        tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
+                    # Count tokens in different ways
+                    tokens = tokenizer.tokenize(hf_user_message)
+                    token_ids = tokenizer.encode(hf_user_message)
+                    # Display results
+                    st.success(f"Token count: {len(tokens)}")
+                    st.success(f"Token IDs count: {len(token_ids)}")
+                    # Show the actual tokens
+                    with st.expander("View Token Details"):
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            st.subheader("Tokens")
+                            st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
+                        with col2:
+                            st.subheader("Token IDs")
+                            st.json(
+                                [
+                                    f"{i}: {token_id}"
+                                    for i, token_id in enumerate(token_ids)
+                                ]
+                            )
+                except Exception as e:
+                    st.error(f"An error occurred: {str(e)}")

utils.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+def set_page_config():
+    """
+    Set the page configuration for the Streamlit app.
+    """
+    st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="centered")
+def display_about_token_counting():
+    """
+    Display additional information about token counting.
+    """
+    with st.expander("About Token Counting"):
+        st.markdown("""
+        ### What are tokens?
+        Tokens are chunks of text that language models process. They can be parts of words, whole words,
+        or even punctuation. Different models tokenize text differently.
+        ### Why count tokens?
+        - **Cost Management**: Understanding token usage helps manage API costs
+        - **Model Limitations**: Different models have different token limits
+        - **Performance Optimization**: Helps optimize prompts for better responses
+        ### Token Counting Tips
+        - Shorter messages use fewer tokens
+        - Special formatting, code blocks, and unusual characters may use more tokens
+        - For Claude models, the system message also counts toward your token usage
+        - Hugging Face models may tokenize text differently than Anthropic models
+        """)
+def display_footer():
+    """
+    Display the footer of the Streamlit app.
+    """
+    st.markdown("---")
+    st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")
+# Constants for model options
+ANTHROPIC_MODEL_OPTIONS = {
+    "Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
+    "Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
+    "Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
+    "Claude 3 Haiku": "claude-3-haiku-20240307",
+    "Claude 3 Opus": "claude-3-opus-20240229",
+}
+HF_MODEL_OPTIONS = [
+    "mistralai/Mistral-Small-24B-Instruct-2501",
+    "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
+    "google/codegemma-7b",
+    "Qwen/Qwen2.5-Coder-32B-Instruct",
+    "microsoft/Phi-4-multimodal-instruct",
+    "nvidia/Llama-3.3-70B-Instruct-FP4",
+    "Other (specify)",
+]