File size: 4,098 Bytes
176f432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
from huggingface_hub import login
from transformers import AutoTokenizer
import os
from utils import HF_MODEL_OPTIONS

def display():
    """
    Display the Hugging Face models tab.
    """
    st.header("Hugging Face Models")

    # HF Token input
    hf_token = st.text_input(
        "Enter your Hugging Face Token",
        type="password",
        help="⚠️ Never share your token. Leave empty to use HF_TOKEN environment variable.",
    )

    # If no token provided, try to get from environment
    if not hf_token:
        hf_token = os.environ.get("HF_TOKEN", "")

    # Login status tracker
    if "hf_logged_in" not in st.session_state:
        st.session_state.hf_logged_in = False

    # Login button
    if not st.session_state.hf_logged_in:
        if st.button("Login to Hugging Face"):
            if not hf_token:
                st.error(
                    "No Hugging Face token found. Please enter a token or set the HF_TOKEN environment variable."
                )
            else:
                try:
                    login(token=hf_token)
                    st.session_state.hf_logged_in = True
                    st.session_state.hf_token = hf_token  # Store the token in session state
                    st.success("Successfully logged in to Hugging Face")
                except Exception as e:
                    st.error(f"Login failed: {str(e)}")
    else:
        st.success("Logged in to Hugging Face")

        # Logout button
        if st.button("Logout"):
            st.session_state.hf_logged_in = False
            st.session_state.hf_token = ""  # Clear the token from session state
            st.success("Successfully logged out from Hugging Face")
            st.experimental_rerun()  # Rerun the script to refresh the UI

    if st.session_state.hf_logged_in or hf_token:
        # Predefined popular models
        selected_hf_model = st.selectbox("Select Hugging Face Model", HF_MODEL_OPTIONS)

        # Custom model input
        if selected_hf_model == "Other (specify)":
            custom_hf_model = st.text_input(
                "Enter model name (e.g., organization/model-name)"
            )
            selected_hf_model = (
                custom_hf_model if custom_hf_model else "gpt2"
            )  # Default to gpt2 if empty

        # User message input for HF
        hf_user_message = st.text_area(
            "Enter your message here",
            placeholder="Hello, world!",
            height=200,
            key="hf_message",
        )

        # Button to count tokens for HF
        if st.button("Count Tokens (Hugging Face)"):
            if not hf_user_message:
                st.warning("Please enter a message to count tokens")
            else:
                try:
                    with st.spinner(f"Loading tokenizer for {selected_hf_model}..."):
                        tokenizer = AutoTokenizer.from_pretrained(selected_hf_model)

                    # Count tokens in different ways
                    tokens = tokenizer.tokenize(hf_user_message)
                    token_ids = tokenizer.encode(hf_user_message)

                    # Display results
                    st.success(f"Token count: {len(tokens)}")
                    st.success(f"Token IDs count: {len(token_ids)}")

                    # Show the actual tokens
                    with st.expander("View Token Details"):
                        col1, col2 = st.columns(2)
                        with col1:
                            st.subheader("Tokens")
                            st.json([f"{i}: {token}" for i, token in enumerate(tokens)])
                        with col2:
                            st.subheader("Token IDs")
                            st.json(
                                [
                                    f"{i}: {token_id}"
                                    for i, token_id in enumerate(token_ids)
                                ]
                            )

                except Exception as e:
                    st.error(f"An error occurred: {str(e)}")