Spaces:
Sleeping
Sleeping
File size: 6,941 Bytes
8250356 400e980 8250356 c62a8cf 8250356 c083776 8250356 c083776 8250356 400e980 8250356 c083776 8250356 400e980 c083776 8250356 c083776 8250356 c62a8cf 400e980 c62a8cf 8250356 400e980 b1e1362 400e980 b1e1362 400e980 8250356 c62a8cf 8250356 c083776 400e980 c083776 8250356 400e980 c083776 8250356 c083776 8250356 400e980 8250356 e38a225 8250356 e38a225 8250356 400e980 8250356 400e980 8250356 c083776 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import gradio as gr
import json
import pandas as pd
import tiktoken
import anthropic
def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
# Check if file is uploaded
if file is None:
return "Please upload a CSV file."
# Read the CSV file
try:
df = pd.read_csv(file)#.name)
except Exception as e:
return f"Error reading CSV file: {e}"
# Initialize output string
output = ""
if calculate_openai:
# Get the OpenAI tokenizer for the selected model
try:
openai_encoding = tiktoken.encoding_for_model(openai_model)
except KeyError:
# Default encoding if model is not found
openai_encoding = tiktoken.get_encoding("cl100k_base")
token_counts_openai = {}
try:
total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
except Exception as e:
return f"Error counting tokens with OpenAI model: {e}"
# Iterate over columns
for col in df.columns:
#tokens_col_openai = 0
try:
tokens_openai = openai_encoding.encode('\n'.join([col]+list(df[col].astype(str).values)))
except Exception as e:
return f"Error counting tokens with OpenAI model: {e}"
# for cell in df[col].astype(str):
# tokens_openai = openai_encoding.encode(cell)
# tokens_col_openai += len(tokens_openai)
token_counts_openai[col] = len(tokens_openai)
#total_tokens_openai += tokens_openai
# Prepare OpenAI output
output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n"
output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n"
for col, count in token_counts_openai.items():
output += f"- {col}: {count} tokens\n"
if calculate_anthropic:
# Get the Anthropic API key from environment variables
#anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
#if not anthropic_api_key:
# return "Please set the ANTHROPIC_API_KEY environment variable."
# Initialize the Anthropic client
#client = anthropic.Anthropic(api_key=anthropic_api_key)
client = anthropic.Anthropic()
token_counts_anthropic = {}
#total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
try:
response = client.beta.messages.count_tokens(
betas=["token-counting-2024-11-01"],
model=anthropic_model, #"claude-3-5-sonnet-20241022",
#system="You are a scientist",
messages=[{
"role": "user",
"content": df.to_csv(index=False)
}],
)
total_tokens_anthropic = json.loads(response.json())['input_tokens']
except Exception as e:
return f"Error counting tokens with Anthropic model: {e}"
# Iterate over columns
for col in df.columns:
#tokens_col_anthropic = 0
try:
#tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values))) #0.37.1 version
response = client.beta.messages.count_tokens(
betas=["token-counting-2024-11-01"],
model=anthropic_model,
messages=[{
"role": "user",
"content": '\n'.join([col]+list(df[col].astype(str).values))
}],
)
tokens_anthropic = json.loads(response.json())['input_tokens']
except Exception as e:
return f"Error counting tokens with Anthropic model: {e}"
# for cell in df[col].astype(str):
# try:
# tokens_anthropic = client.count_tokens(cell)
# except Exception as e:
# return f"Error counting tokens with Anthropic model: {e}"
# tokens_col_anthropic += tokens_anthropic
token_counts_anthropic[col] = tokens_anthropic
#total_tokens_anthropic += tokens_anthropic
# Prepare Anthropic output
output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n"
for col, count in token_counts_anthropic.items():
output += f"- {col}: {count} tokens\n"
if not calculate_openai and not calculate_anthropic:
output = "Please select at least one model to calculate tokens."
return output
def main():
with gr.Blocks() as demo:
gr.Markdown("# Token Counter")
gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
gr.Markdown("""
For OpenAI models Python package `tiktoken` is used.
For Anthropic models beta version of [Token counting](https://docs.anthropic.com/en/docs/build-with-claude/token-counting) is used.
""")
with gr.Row():
file_input = gr.File(label="Upload CSV File", type="filepath")
with gr.Row():
calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")
with gr.Row():
openai_model = gr.Dropdown(
choices=['gpt-4o', 'gpt-4o-mini', 'gpt-4'],
label="Select OpenAI Model",
visible=False
)
anthropic_model = gr.Dropdown(
choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest', 'claude-3-haiku-20240307'],
label="Select Anthropic Model",
visible=False
)
def update_openai_visibility(selected):
return gr.update(visible=selected)
def update_anthropic_visibility(selected):
return gr.update(visible=selected)
calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)
submit_button = gr.Button("Calculate Tokens")
output = gr.Markdown()
inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
#demo.launch(share=True)
demo.launch()
if __name__ == "__main__":
main() |