Spaces:

tsphan
/

llm-token-counter

Running

App Files Files Community

Timothy S. Phan commited on 7 days ago

Commit

f4147c3

1 Parent(s): 7b0e2d6

adds iniital streamlit app to hf spaces

Browse files

Files changed (2) hide show

app.py +220 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import streamlit as st
+import anthropic
+import json
+import os
+from huggingface_hub import login
+from transformers import AutoTokenizer
+st.set_page_config(page_title="LLM Token Counter", page_icon="🤖", layout="wide")
+st.title("🎈 LLM Token Counter")
+st.markdown(
+    "This app counts tokens for different language models based on your input text."
+)
+# Tabs for model provider selection
+provider_tab = st.tabs(["Anthropic Models", "Hugging Face Models"])
+with provider_tab[0]:  # Anthropic Models
+    st.header("Anthropic (Claude) Models")
+    # API key input (with warning about security)
+    anthropic_key = st.text_input(
+        "Enter your Anthropic API Key",
+        type="password",
+        help="⚠️ Never share your API key. Leave empty to use ANTHROPIC_API_KEY environment variable.",
+    )
+    # If no key provided, try to get from environment
+    if not anthropic_key:
+        anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    # Model selection for Anthropic
+    anthropic_model_options = {
+        "Claude 3.7 Sonnet": "claude-3-7-sonnet-20250219",
+        "Claude 3.5 Sonnet": "claude-3-5-sonnet-20240620",
+        "Claude 3.5 Haiku": "claude-3-5-haiku-20240307",
+        "Claude 3 Haiku": "claude-3-haiku-20240307",
+        "Claude 3 Opus": "claude-3-opus-20240229",
+    }
+    selected_anthropic_model = st.selectbox(
+        "Select Claude Model", list(anthropic_model_options.keys())
+    )
+    # System message (optional)
+    st.subheader("System Message (Optional)")
+    system_message = st.text_area(
+        "System Message", placeholder="e.g., You are a helpful assistant", height=100
+    )
+    # User message input
+    st.subheader("Message Content")
+    anthropic_user_message = st.text_area(
+        "Enter your message here",
+        placeholder="Hello, Claude! How are you today?",
+        height=200,
+        key="anthropic_message",
+    )
+    # Button to count tokens for Anthropic
+    if st.button("Count Tokens (Anthropic)"):
+        if not anthropic_key:
+            st.error(
+                "No Anthropic API key found. Please enter a key or set the ANTHROPIC_API_KEY environment variable."
+            )
+        elif not anthropic_user_message:
+            st.warning("Please enter a message to count tokens")
+        else:
+            try:
+                # Initialize client with API key
+                client = anthropic.Anthropic(api_key=anthropic_key)
+                # Create the request
+                count_request = {
+                    "model": anthropic_model_options[selected_anthropic_model],
+                    "messages": [{"role": "user", "content": anthropic_user_message}],
+                }
+                # Add system message if provided
+                if system_message:
+                    count_request["system"] = system_message
+                # Make the API call to count tokens
+                response = client.messages.count_tokens(**count_request)
+                # Display results
+                st.success(f"Input tokens: {response.input_tokens}")
+                # Display the full JSON response in an expandable section
+                with st.expander("View Full API Response"):
+                    st.code(
+                        json.dumps(response.model_dump(), indent=2), language="json"
+                    )
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}")
+with provider_tab[1]:  # Hugging Face Models
+    st.header("Hugging Face Models")
+    # HF Token input
+    hf_token = st.text_input(
+        "Enter your Hugging Face Token",
+        type="password",
+        help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
+    )
+    # If no token provided, try to get from environment
+    if not hf_token:
+        hf_token = os.environ.get("HF_TOKEN", "")
+    # Login status tracker
+    if "hf_logged_in" not in st.session_state:
+        st.session_state.hf_logged_in = False
+    # Login button
+    if not st.session_state.hf_logged_in and st.button("Login to Hugging Face"):
+        if not hf_token:
+            st.error(
+                "No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
+            )
+        else:
+            try:
+                login(token=hf_token)
+                st.session_state.hf_logged_in = True
+                st.success("Successfully logged in to Hugging Face")
+            except Exception as e:
+                st.error(f"Login failed: {str(e)}")
+    if st.session_state.hf_logged_in or hf_token:
+        # Predefined popular models
+        hf_model_options = [
+            "mistralai/Mistral-Small-24B-Instruct-2501",
+            "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
+            "google/codegemma-7b",
+            "Qwen/Qwen2.5-Coder-32B-Instruct",
+            "microsoft/Phi-4-multimodal-instruct",
+            "nvidia/Llama-3.3-70B-Instruct-FP4",
+            "Other (specify)",
+        ]
+        selected_hf_model = st.selectbox("Select Hugging Face Model", hf_model_options)
+        # Custom model input
+        if selected_hf_model == "Other (specify)":
+            custom_hf_model = st.text_input(
+                "Enter model name (e.g., organization/model-name)"
+            )
+            selected_hf_model = (
+                custom_hf_model if custom_hf_model else "gpt2"
+            )  # Default to gpt2 if empty
+        # User message input for HF
+        hf_user_message = st.text_area(
+            "Enter your message here",
+            placeholder="Hello, world!",
+            height=200,
+            key="hf_message",
+        )
+        # Button to count tokens for HF
+        if st.button("Count Tokens (Hugging Face)"):
+            if not hf_user_message:
+                st.warning("Please enter a message to count tokens")
+            else:
+                try:
+                    with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
+                        tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)
+                    # Count tokens in different ways
+                    tokens = tokenizer.tokenize(hf_user_message)
+                    token_ids = tokenizer.encode(hf_user_message)
+                    # Display results
+                    st.success(f"Token count: {len(tokens)}")
+                    st.success(f"Token IDs count: {len(token_ids)}")
+                    # Show the actual tokens
+                    with st.expander("View Token Details"):
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            st.subheader("Tokens")
+                            st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
+                        with col2:
+                            st.subheader("Token IDs")
+                            st.json(
+                                [
+                                    f"{i}: {token_id}"
+                                    for i, token_id in enumerate(token_ids)
+                                ]
+                            )
+                except Exception as e:
+                    st.error(f"An error occurred: {str(e)}")
+# Additional information
+with st.expander("About Token Counting"):
+    st.markdown("""
+    ### What are tokens?
+    Tokens are chunks of text that language models process. They can be parts of words, whole words,
+    or even punctuation. Different models tokenize text differently.
+    ### Why count tokens?
+    - **Cost Management**: Understanding token usage helps manage API costs
+    - **Model Limitations**: Different models have different token limits
+    - **Performance Optimization**: Helps optimize prompts for better responses
+    ### Token Counting Tips
+    - Shorter messages use fewer tokens
+    - Special formatting, code blocks, and unusual characters may use more tokens
+    - For Claude models, the system message also counts toward your token usage
+    - Hugging Face models may tokenize text differently than Anthropic models
+    """)
+# Footer
+st.markdown("---")
+st.markdown("Created with Streamlit, Anthropic API, and Hugging Face Transformers")

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+huggingface_hub
+transformers
+anthropic