File size: 4,735 Bytes
8250356
 
 
c62a8cf
 
8250356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e38a225
8250356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c62a8cf
 
 
 
 
 
 
 
 
8250356
 
 
c62a8cf
8250356
 
 
c62a8cf
8250356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e38a225
8250356
 
 
 
 
 
 
e38a225
8250356
 
 
 
e38a225
8250356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32f5ca5
8250356
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import pandas as pd
import tiktoken
import anthropic
#import os

def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
    # Check if file is uploaded
    if file is None:
        return "Please upload a CSV file."
    
    # Read the CSV file
    df = pd.read_csv(file.name)
    
    # Initialize output string
    output = ""
    
    if calculate_openai:
        # Get the OpenAI tokenizer for the selected model
        try:
            openai_encoding = tiktoken.encoding_for_model(openai_model)
        except KeyError:
            # Default encoding if model is not found
            openai_encoding = tiktoken.get_encoding("cl100k_base") #("o200k_base") # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
        
        token_counts_openai = {}
        total_tokens_openai = 0
        
        # Iterate over columns
        for col in df.columns:
            tokens_col_openai = 0
            for cell in df[col].astype(str):
                tokens_openai = openai_encoding.encode(cell)
                tokens_col_openai += len(tokens_openai)
            token_counts_openai[col] = tokens_col_openai
            total_tokens_openai += tokens_col_openai
        
        # Prepare OpenAI output
        output += f"**OpenAI Token Counts per Column ({openai_model}):**\n"
        for col, count in token_counts_openai.items():
            output += f"- {col}: {count} tokens\n"
        output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n\n"
    
    if calculate_anthropic:
        # Get the Anthropic API key from environment variables
        #anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
        #if not anthropic_api_key:
        #    return "Please set the ANTHROPIC_API_KEY environment variable."
        
        # Initialize the Anthropic client
        #client = anthropic.Anthropic(api_key=anthropic_api_key)
        client = anthropic.Anthropic()
        
        token_counts_anthropic = {}
        total_tokens_anthropic = 0
        
        # Iterate over columns
        for col in df.columns:
            tokens_col_anthropic = 0
            for cell in df[col].astype(str):
                tokens_anthropic = client.count_tokens(cell)
                tokens_col_anthropic += tokens_anthropic
            token_counts_anthropic[col] = tokens_col_anthropic
            total_tokens_anthropic += tokens_col_anthropic
        
        # Prepare Anthropic output
        output += f"**Anthropic Token Counts per Column ({anthropic_model}):**\n"
        for col, count in token_counts_anthropic.items():
            output += f"- {col}: {count} tokens\n"
        output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
    
    if not calculate_openai and not calculate_anthropic:
        output = "Please select at least one model to calculate tokens."
    
    return output

def main():
    with gr.Blocks() as demo:
        gr.Markdown("# Token Counter")
        gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
        
        with gr.Row():
            file_input = gr.File(label="Upload CSV File", type="filepath")
        
        with gr.Row():
            calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
            calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")
        
        with gr.Row():
            openai_model = gr.Dropdown(
                choices=['gpt-4o', 'gpt-4o-mini', 'gpt-4'],
                label="Select OpenAI Model",
                visible=False
            )
            anthropic_model = gr.Dropdown(
                choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest'],
                label="Select Anthropic Model",
                visible=False
            )
        
        def update_openai_visibility(selected):
            return gr.update(visible=selected)
        
        def update_anthropic_visibility(selected):
            return gr.update(visible=selected)
        
        calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
        calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)
        
        submit_button = gr.Button("Calculate Tokens")
        output = gr.Markdown()
        
        inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
        submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
        
    demo.launch(share=True)

if __name__ == "__main__":
    main()