Spaces:

Aivis
/

Token_counter

Sleeping

File size: 6,941 Bytes

import gradio as gr
import json
import pandas as pd
import tiktoken
import anthropic

def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
    # Check if file is uploaded
    if file is None:
        return "Please upload a CSV file."
    
    # Read the CSV file
    try:
        df = pd.read_csv(file)#.name)
    except Exception as e:
        return f"Error reading CSV file: {e}"
    
    # Initialize output string
    output = ""
    
    if calculate_openai:
        # Get the OpenAI tokenizer for the selected model
        try:
            openai_encoding = tiktoken.encoding_for_model(openai_model)
        except KeyError:
            # Default encoding if model is not found
            openai_encoding = tiktoken.get_encoding("cl100k_base")
        
        token_counts_openai = {}
        try:
            total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
        except Exception as e:
            return f"Error counting tokens with OpenAI model: {e}"        
        
        # Iterate over columns
        for col in df.columns:
            #tokens_col_openai = 0
            try:
                tokens_openai = openai_encoding.encode('\n'.join([col]+list(df[col].astype(str).values)))
            except Exception as e:
                return f"Error counting tokens with OpenAI model: {e}"
            # for cell in df[col].astype(str):
            #     tokens_openai = openai_encoding.encode(cell)
            #     tokens_col_openai += len(tokens_openai)
            token_counts_openai[col] = len(tokens_openai)
            #total_tokens_openai += tokens_openai
        
        # Prepare OpenAI output
        output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n"
        output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n"
        for col, count in token_counts_openai.items():
            output += f"- {col}: {count} tokens\n"
        
    
    if calculate_anthropic:
        # Get the Anthropic API key from environment variables
        #anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
        #if not anthropic_api_key:
        #    return "Please set the ANTHROPIC_API_KEY environment variable."
        
        # Initialize the Anthropic client
        #client = anthropic.Anthropic(api_key=anthropic_api_key)
        client = anthropic.Anthropic()
        
        token_counts_anthropic = {}
        #total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
        try:
            response = client.beta.messages.count_tokens(
                betas=["token-counting-2024-11-01"],
                model=anthropic_model, #"claude-3-5-sonnet-20241022",
                #system="You are a scientist",
                messages=[{
                    "role": "user",
                    "content": df.to_csv(index=False)
                    }],
                    )
            total_tokens_anthropic = json.loads(response.json())['input_tokens']
        except Exception as e:
            return f"Error counting tokens with Anthropic model: {e}"
        
        
        # Iterate over columns
        for col in df.columns:
            #tokens_col_anthropic = 0
            try:
                #tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values))) #0.37.1 version
                response = client.beta.messages.count_tokens(
                    betas=["token-counting-2024-11-01"],
                    model=anthropic_model,
                    messages=[{
                        "role": "user",
                        "content": '\n'.join([col]+list(df[col].astype(str).values))
                        }],
                        )
                tokens_anthropic = json.loads(response.json())['input_tokens']
            except Exception as e:
                return f"Error counting tokens with Anthropic model: {e}"
            # for cell in df[col].astype(str):
            #     try:
            #         tokens_anthropic = client.count_tokens(cell)
            #     except Exception as e:
            #         return f"Error counting tokens with Anthropic model: {e}"
            #     tokens_col_anthropic += tokens_anthropic
            token_counts_anthropic[col] = tokens_anthropic
            #total_tokens_anthropic += tokens_anthropic
        
        # Prepare Anthropic output
        output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
        output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n"
        for col, count in token_counts_anthropic.items():
            output += f"- {col}: {count} tokens\n"
        
    
    if not calculate_openai and not calculate_anthropic:
        output = "Please select at least one model to calculate tokens."
    
    return output

def main():
    with gr.Blocks() as demo:
        gr.Markdown("# Token Counter")
        gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
        gr.Markdown("""
                    For OpenAI models Python package `tiktoken` is used.
                    For Anthropic models beta version of [Token counting](https://docs.anthropic.com/en/docs/build-with-claude/token-counting) is used.
                    """)
        
        with gr.Row():
            file_input = gr.File(label="Upload CSV File", type="filepath")
        
        with gr.Row():
            calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
            calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")
        
        with gr.Row():
            openai_model = gr.Dropdown(
                choices=['gpt-4o', 'gpt-4o-mini', 'gpt-4'],
                label="Select OpenAI Model",
                visible=False
            )
            anthropic_model = gr.Dropdown(
                choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest', 'claude-3-haiku-20240307'],
                label="Select Anthropic Model",
                visible=False
            )
        
        def update_openai_visibility(selected):
            return gr.update(visible=selected)
        
        def update_anthropic_visibility(selected):
            return gr.update(visible=selected)
        
        calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
        calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)
        
        submit_button = gr.Button("Calculate Tokens")
        output = gr.Markdown()
        
        inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
        submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
        
    #demo.launch(share=True)
    demo.launch()

if __name__ == "__main__":
    main()