import streamlit as st from huggingface_hub import login from transformers import AutoTokenizer import os from utils import HF_MODEL_OPTIONS def display(): """ Display the Hugging Face models tab. """ st.header("Hugging Face Models") # HF Token input hf_token = st.text_input( "Enter your Hugging Face Token", type="password", help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.", ) # If no token provided, try to get from environment if not hf_token: hf_token = os.environ.get("HF_TOKEN", "") # Login status tracker if "hf_logged_in" not in st.session_state: st.session_state.hf_logged_in = False # Login button if not st.session_state.hf_logged_in: if st.button("Login to Hugging Face"): if not hf_token: st.error( "No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable." ) else: try: login(token=hf_token) st.session_state.hf_logged_in = True st.session_state.hf_token = hf_token # Store the token in session state st.success("Successfully logged in to Hugging Face") except Exception as e: st.error(f"Login failed: {str(e)}") else: st.success("Logged in to Hugging Face") # Logout button if st.button("Logout"): st.session_state.hf_logged_in = False st.session_state.hf_token = "" # Clear the token from session state st.success("Successfully logged out from Hugging Face") st.experimental_rerun() # Rerun the script to refresh the UI if st.session_state.hf_logged_in or hf_token: # Predefined popular models selected_hf_model = st.selectbox("Select Hugging Face Model", HF_MODEL_OPTIONS) # Custom model input if selected_hf_model == "Other (specify)": custom_hf_model = st.text_input( "Enter model name (e.g., organization/model-name)" ) selected_hf_model = ( custom_hf_model if custom_hf_model else "gpt2" ) # Default to gpt2 if empty # User message input for HF hf_user_message = st.text_area( "Enter your message here", placeholder="Hello, world!", height=200, key="hf_message", ) # Button to count tokens for HF if st.button("Count Tokens (Hugging Face)"): if not hf_user_message: st.warning("Please enter a message to count tokens") else: try: with st.spinner(f"Loading tokenizer for {selected_hf_model}..."): tokenizer = AutoTokenizer.from_pretrained(selected_hf_model) # Count tokens in different ways tokens = tokenizer.tokenize(hf_user_message) token_ids = tokenizer.encode(hf_user_message) # Display results st.success(f"Token count: {len(tokens)}") st.success(f"Token IDs count: {len(token_ids)}") # Show the actual tokens with st.expander("View Token Details"): col1, col2 = st.columns(2) with col1: st.subheader("Tokens") st.json([f"{i}: {token}" for i, token in enumerate(tokens)]) with col2: st.subheader("Token IDs") st.json( [ f"{i}: {token_id}" for i, token_id in enumerate(token_ids) ] ) except Exception as e: st.error(f"An error occurred: {str(e)}")