Spaces:
Sleeping
Sleeping
File size: 4,735 Bytes
8250356 c62a8cf 8250356 e38a225 8250356 c62a8cf 8250356 c62a8cf 8250356 c62a8cf 8250356 e38a225 8250356 e38a225 8250356 e38a225 8250356 32f5ca5 8250356 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
import pandas as pd
import tiktoken
import anthropic
#import os
def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
# Check if file is uploaded
if file is None:
return "Please upload a CSV file."
# Read the CSV file
df = pd.read_csv(file.name)
# Initialize output string
output = ""
if calculate_openai:
# Get the OpenAI tokenizer for the selected model
try:
openai_encoding = tiktoken.encoding_for_model(openai_model)
except KeyError:
# Default encoding if model is not found
openai_encoding = tiktoken.get_encoding("cl100k_base") #("o200k_base") # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
token_counts_openai = {}
total_tokens_openai = 0
# Iterate over columns
for col in df.columns:
tokens_col_openai = 0
for cell in df[col].astype(str):
tokens_openai = openai_encoding.encode(cell)
tokens_col_openai += len(tokens_openai)
token_counts_openai[col] = tokens_col_openai
total_tokens_openai += tokens_col_openai
# Prepare OpenAI output
output += f"**OpenAI Token Counts per Column ({openai_model}):**\n"
for col, count in token_counts_openai.items():
output += f"- {col}: {count} tokens\n"
output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n\n"
if calculate_anthropic:
# Get the Anthropic API key from environment variables
#anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
#if not anthropic_api_key:
# return "Please set the ANTHROPIC_API_KEY environment variable."
# Initialize the Anthropic client
#client = anthropic.Anthropic(api_key=anthropic_api_key)
client = anthropic.Anthropic()
token_counts_anthropic = {}
total_tokens_anthropic = 0
# Iterate over columns
for col in df.columns:
tokens_col_anthropic = 0
for cell in df[col].astype(str):
tokens_anthropic = client.count_tokens(cell)
tokens_col_anthropic += tokens_anthropic
token_counts_anthropic[col] = tokens_col_anthropic
total_tokens_anthropic += tokens_col_anthropic
# Prepare Anthropic output
output += f"**Anthropic Token Counts per Column ({anthropic_model}):**\n"
for col, count in token_counts_anthropic.items():
output += f"- {col}: {count} tokens\n"
output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
if not calculate_openai and not calculate_anthropic:
output = "Please select at least one model to calculate tokens."
return output
def main():
with gr.Blocks() as demo:
gr.Markdown("# Token Counter")
gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
with gr.Row():
file_input = gr.File(label="Upload CSV File", type="filepath")
with gr.Row():
calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")
with gr.Row():
openai_model = gr.Dropdown(
choices=['gpt-4o', 'gpt-4o-mini', 'gpt-4'],
label="Select OpenAI Model",
visible=False
)
anthropic_model = gr.Dropdown(
choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest'],
label="Select Anthropic Model",
visible=False
)
def update_openai_visibility(selected):
return gr.update(visible=selected)
def update_anthropic_visibility(selected):
return gr.update(visible=selected)
calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)
submit_button = gr.Button("Calculate Tokens")
output = gr.Markdown()
inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
demo.launch(share=True)
if __name__ == "__main__":
main()
|