File size: 5,801 Bytes
8250356
 
 
c62a8cf
 
8250356
 
 
 
 
 
 
c083776
 
 
 
8250356
 
 
 
 
 
 
 
 
 
c083776
8250356
 
c083776
8250356
 
 
c083776
 
 
 
 
 
 
 
 
 
8250356
 
c083776
 
8250356
 
c083776
8250356
 
c62a8cf
 
 
 
 
 
 
c41e1fe
 
 
 
 
 
c62a8cf
8250356
b1e1362
 
 
e6df325
8250356
c62a8cf
8250356
c083776
 
 
 
 
 
 
 
 
 
 
 
 
8250356
 
c083776
 
8250356
 
c083776
8250356
 
 
 
 
 
 
 
 
 
 
 
e38a225
8250356
 
 
 
 
 
 
e38a225
8250356
 
 
 
e38a225
8250356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32f5ca5
8250356
 
c083776
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import pandas as pd
import tiktoken
import anthropic
#import os

def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
    # Check if file is uploaded
    if file is None:
        return "Please upload a CSV file."
    
    # Read the CSV file
    try:
        df = pd.read_csv(file)#.name)
    except Exception as e:
        return f"Error reading CSV file: {e}"
    
    # Initialize output string
    output = ""
    
    if calculate_openai:
        # Get the OpenAI tokenizer for the selected model
        try:
            openai_encoding = tiktoken.encoding_for_model(openai_model)
        except KeyError:
            # Default encoding if model is not found
            openai_encoding = tiktoken.get_encoding("cl100k_base")
        
        token_counts_openai = {}
        total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
        
        # Iterate over columns
        for col in df.columns:
            #tokens_col_openai = 0
            try:
                tokens_openai = openai_encoding.encode('\n'.join([col]+list(df[col].astype(str).values)))
            except Exception as e:
                return f"Error counting tokens with OpenAI model: {e}"
            # for cell in df[col].astype(str):
            #     tokens_openai = openai_encoding.encode(cell)
            #     tokens_col_openai += len(tokens_openai)
            token_counts_openai[col] = len(tokens_openai)
            #total_tokens_openai += tokens_openai
        
        # Prepare OpenAI output
        output += f"**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n"
        output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n"
        for col, count in token_counts_openai.items():
            output += f"- {col}: {count} tokens\n"
        
    
    if calculate_anthropic:
        # Get the Anthropic API key from environment variables
        #anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
        #if not anthropic_api_key:
        #    return "Please set the ANTHROPIC_API_KEY environment variable."
        
        # Initialize the Anthropic client
        #client = anthropic.Anthropic(api_key=anthropic_api_key)
        #client = anthropic.Anthropic()
        try:
            client = anthropic.Anthropic()
            print("Anthropic client initialized successfully")
        except Exception as e:
            return f"Error initializing Anthropic client: {e}"        
        
        token_counts_anthropic = {}
        try:
            total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
        except Exception as e:
            return f"Error counting tokens with Anthropic model ({anthropic.__version__}): {e}"        
        
        # Iterate over columns
        for col in df.columns:
            #tokens_col_anthropic = 0
            try:
                tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values)))
            except Exception as e:
                return f"Error counting tokens with Anthropic model: {e}"
            # for cell in df[col].astype(str):
            #     try:
            #         tokens_anthropic = client.count_tokens(cell)
            #     except Exception as e:
            #         return f"Error counting tokens with Anthropic model: {e}"
            #     tokens_col_anthropic += tokens_anthropic
            token_counts_anthropic[col] = tokens_anthropic
            #total_tokens_anthropic += tokens_anthropic
        
        # Prepare Anthropic output
        output += f"**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
        output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n"
        for col, count in token_counts_anthropic.items():
            output += f"- {col}: {count} tokens\n"
        
    
    if not calculate_openai and not calculate_anthropic:
        output = "Please select at least one model to calculate tokens."
    
    return output

def main():
    with gr.Blocks() as demo:
        gr.Markdown("# Token Counter")
        gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
        
        with gr.Row():
            file_input = gr.File(label="Upload CSV File", type="filepath")
        
        with gr.Row():
            calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
            calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")
        
        with gr.Row():
            openai_model = gr.Dropdown(
                choices=['gpt-4o', 'gpt-4o-mini', 'gpt-4'],
                label="Select OpenAI Model",
                visible=False
            )
            anthropic_model = gr.Dropdown(
                choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest'],
                label="Select Anthropic Model",
                visible=False
            )
        
        def update_openai_visibility(selected):
            return gr.update(visible=selected)
        
        def update_anthropic_visibility(selected):
            return gr.update(visible=selected)
        
        calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
        calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)
        
        submit_button = gr.Button("Calculate Tokens")
        output = gr.Markdown()
        
        inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
        submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
        
    demo.launch(share=True)

if __name__ == "__main__":
    main()