Spaces:
Running
Running
File size: 5,801 Bytes
8250356 c62a8cf 8250356 c083776 8250356 c083776 8250356 c083776 8250356 c083776 8250356 c083776 8250356 c083776 8250356 c62a8cf c41e1fe c62a8cf 8250356 b1e1362 e6df325 8250356 c62a8cf 8250356 c083776 8250356 c083776 8250356 c083776 8250356 e38a225 8250356 e38a225 8250356 e38a225 8250356 32f5ca5 8250356 c083776 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
import pandas as pd
import tiktoken
import anthropic
#import os
def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
# Check if file is uploaded
if file is None:
return "Please upload a CSV file."
# Read the CSV file
try:
df = pd.read_csv(file)#.name)
except Exception as e:
return f"Error reading CSV file: {e}"
# Initialize output string
output = ""
if calculate_openai:
# Get the OpenAI tokenizer for the selected model
try:
openai_encoding = tiktoken.encoding_for_model(openai_model)
except KeyError:
# Default encoding if model is not found
openai_encoding = tiktoken.get_encoding("cl100k_base")
token_counts_openai = {}
total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
# Iterate over columns
for col in df.columns:
#tokens_col_openai = 0
try:
tokens_openai = openai_encoding.encode('\n'.join([col]+list(df[col].astype(str).values)))
except Exception as e:
return f"Error counting tokens with OpenAI model: {e}"
# for cell in df[col].astype(str):
# tokens_openai = openai_encoding.encode(cell)
# tokens_col_openai += len(tokens_openai)
token_counts_openai[col] = len(tokens_openai)
#total_tokens_openai += tokens_openai
# Prepare OpenAI output
output += f"**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n"
output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n"
for col, count in token_counts_openai.items():
output += f"- {col}: {count} tokens\n"
if calculate_anthropic:
# Get the Anthropic API key from environment variables
#anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
#if not anthropic_api_key:
# return "Please set the ANTHROPIC_API_KEY environment variable."
# Initialize the Anthropic client
#client = anthropic.Anthropic(api_key=anthropic_api_key)
#client = anthropic.Anthropic()
try:
client = anthropic.Anthropic()
print("Anthropic client initialized successfully")
except Exception as e:
return f"Error initializing Anthropic client: {e}"
token_counts_anthropic = {}
try:
total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
except Exception as e:
return f"Error counting tokens with Anthropic model ({anthropic.__version__}): {e}"
# Iterate over columns
for col in df.columns:
#tokens_col_anthropic = 0
try:
tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values)))
except Exception as e:
return f"Error counting tokens with Anthropic model: {e}"
# for cell in df[col].astype(str):
# try:
# tokens_anthropic = client.count_tokens(cell)
# except Exception as e:
# return f"Error counting tokens with Anthropic model: {e}"
# tokens_col_anthropic += tokens_anthropic
token_counts_anthropic[col] = tokens_anthropic
#total_tokens_anthropic += tokens_anthropic
# Prepare Anthropic output
output += f"**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n"
for col, count in token_counts_anthropic.items():
output += f"- {col}: {count} tokens\n"
if not calculate_openai and not calculate_anthropic:
output = "Please select at least one model to calculate tokens."
return output
def main():
with gr.Blocks() as demo:
gr.Markdown("# Token Counter")
gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
with gr.Row():
file_input = gr.File(label="Upload CSV File", type="filepath")
with gr.Row():
calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")
with gr.Row():
openai_model = gr.Dropdown(
choices=['gpt-4o', 'gpt-4o-mini', 'gpt-4'],
label="Select OpenAI Model",
visible=False
)
anthropic_model = gr.Dropdown(
choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest'],
label="Select Anthropic Model",
visible=False
)
def update_openai_visibility(selected):
return gr.update(visible=selected)
def update_anthropic_visibility(selected):
return gr.update(visible=selected)
calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)
submit_button = gr.Button("Calculate Tokens")
output = gr.Markdown()
inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
demo.launch(share=True)
if __name__ == "__main__":
main() |