Aivis commited on
Commit
8250356
·
verified ·
1 Parent(s): 8eea335

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import tiktoken
4
+ from anthropic import tokenizer
5
+
6
+ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
7
+ # Check if file is uploaded
8
+ if file is None:
9
+ return "Please upload a CSV file."
10
+
11
+ # Read the CSV file
12
+ df = pd.read_csv(file.name)
13
+
14
+ # Initialize output string
15
+ output = ""
16
+
17
+ if calculate_openai:
18
+ # Get the OpenAI tokenizer for the selected model
19
+ try:
20
+ openai_encoding = tiktoken.encoding_for_model(openai_model)
21
+ except KeyError:
22
+ # Default encoding if model is not found
23
+ openai_encoding = tiktoken.get_encoding("cl100k_base")
24
+
25
+ token_counts_openai = {}
26
+ total_tokens_openai = 0
27
+
28
+ # Iterate over columns
29
+ for col in df.columns:
30
+ tokens_col_openai = 0
31
+ for cell in df[col].astype(str):
32
+ tokens_openai = openai_encoding.encode(cell)
33
+ tokens_col_openai += len(tokens_openai)
34
+ token_counts_openai[col] = tokens_col_openai
35
+ total_tokens_openai += tokens_col_openai
36
+
37
+ # Prepare OpenAI output
38
+ output += f"**OpenAI Token Counts per Column ({openai_model}):**\n"
39
+ for col, count in token_counts_openai.items():
40
+ output += f"- {col}: {count} tokens\n"
41
+ output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n\n"
42
+
43
+ if calculate_anthropic:
44
+ # For Anthropic tokenizer (assuming same tokenizer across models)
45
+ token_counts_anthropic = {}
46
+ total_tokens_anthropic = 0
47
+
48
+ for col in df.columns:
49
+ tokens_col_anthropic = 0
50
+ for cell in df[col].astype(str):
51
+ tokens_anthropic = len(tokenizer.encode(cell))
52
+ tokens_col_anthropic += tokens_anthropic
53
+ token_counts_anthropic[col] = tokens_col_anthropic
54
+ total_tokens_anthropic += tokens_col_anthropic
55
+
56
+ # Prepare Anthropic output
57
+ output += f"**Anthropic Token Counts per Column ({anthropic_model}):**\n"
58
+ for col, count in token_counts_anthropic.items():
59
+ output += f"- {col}: {count} tokens\n"
60
+ output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
61
+
62
+ if not calculate_openai and not calculate_anthropic:
63
+ output = "Please select at least one model to calculate tokens."
64
+
65
+ return output
66
+
67
+ def main():
68
+ with gr.Blocks() as demo:
69
+ gr.Markdown("# Token Counter")
70
+ gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
71
+
72
+ with gr.Row():
73
+ file_input = gr.File(label="Upload CSV File", type="file")
74
+
75
+ with gr.Row():
76
+ calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
77
+ calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")
78
+
79
+ with gr.Row():
80
+ openai_model = gr.Dropdown(
81
+ choices=['gpt-4', 'gpt-3.5-turbo', 'text-davinci-003'],
82
+ label="Select OpenAI Model",
83
+ visible=False
84
+ )
85
+ anthropic_model = gr.Dropdown(
86
+ choices=['claude-v1', 'claude-v1.3', 'claude-instant-v1'],
87
+ label="Select Anthropic Model",
88
+ visible=False
89
+ )
90
+
91
+ def update_openai_visibility(selected):
92
+ return gr.update(visible=selected)
93
+
94
+ def update_anthropic_visibility(selected):
95
+ return gr.update(visible=selected)
96
+
97
+ calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
98
+ calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)
99
+
100
+ submit_button = gr.Button("Calculate Tokens")
101
+ output = gr.Markdown()
102
+
103
+ inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
104
+ submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
105
+
106
+ demo.launch()
107
+
108
+ if __name__ == "__main__":
109
+ main()