Spaces:

Aivis
/

Token_counter

Running

App Files Files Community

Aivis commited on Nov 7, 2024

Commit

8250356

verified ·

1 Parent(s): 8eea335

Create app.py

Browse files

Files changed (1) hide show

app.py +109 -0

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import gradio as gr
+import pandas as pd
+import tiktoken
+from anthropic import tokenizer
+def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
+    # Check if file is uploaded
+    if file is None:
+        return "Please upload a CSV file."
+    # Read the CSV file
+    df = pd.read_csv(file.name)
+    # Initialize output string
+    output = ""
+    if calculate_openai:
+        # Get the OpenAI tokenizer for the selected model
+        try:
+            openai_encoding = tiktoken.encoding_for_model(openai_model)
+        except KeyError:
+            # Default encoding if model is not found
+            openai_encoding = tiktoken.get_encoding("cl100k_base")
+        token_counts_openai = {}
+        total_tokens_openai = 0
+        # Iterate over columns
+        for col in df.columns:
+            tokens_col_openai = 0
+            for cell in df[col].astype(str):
+                tokens_openai = openai_encoding.encode(cell)
+                tokens_col_openai += len(tokens_openai)
+            token_counts_openai[col] = tokens_col_openai
+            total_tokens_openai += tokens_col_openai
+        # Prepare OpenAI output
+        output += f"**OpenAI Token Counts per Column ({openai_model}):**\n"
+        for col, count in token_counts_openai.items():
+            output += f"- {col}: {count} tokens\n"
+        output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n\n"
+    if calculate_anthropic:
+        # For Anthropic tokenizer (assuming same tokenizer across models)
+        token_counts_anthropic = {}
+        total_tokens_anthropic = 0
+        for col in df.columns:
+            tokens_col_anthropic = 0
+            for cell in df[col].astype(str):
+                tokens_anthropic = len(tokenizer.encode(cell))
+                tokens_col_anthropic += tokens_anthropic
+            token_counts_anthropic[col] = tokens_col_anthropic
+            total_tokens_anthropic += tokens_col_anthropic
+        # Prepare Anthropic output
+        output += f"**Anthropic Token Counts per Column ({anthropic_model}):**\n"
+        for col, count in token_counts_anthropic.items():
+            output += f"- {col}: {count} tokens\n"
+        output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
+    if not calculate_openai and not calculate_anthropic:
+        output = "Please select at least one model to calculate tokens."
+    return output
+def main():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Token Counter")
+        gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
+        with gr.Row():
+            file_input = gr.File(label="Upload CSV File", type="file")
+        with gr.Row():
+            calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
+            calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")
+        with gr.Row():
+            openai_model = gr.Dropdown(
+                choices=['gpt-4', 'gpt-3.5-turbo', 'text-davinci-003'],
+                label="Select OpenAI Model",
+                visible=False
+            )
+            anthropic_model = gr.Dropdown(
+                choices=['claude-v1', 'claude-v1.3', 'claude-instant-v1'],
+                label="Select Anthropic Model",
+                visible=False
+            )
+        def update_openai_visibility(selected):
+            return gr.update(visible=selected)
+        def update_anthropic_visibility(selected):
+            return gr.update(visible=selected)
+        calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
+        calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)
+        submit_button = gr.Button("Calculate Tokens")
+        output = gr.Markdown()
+        inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
+        submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
+    demo.launch()
+if __name__ == "__main__":
+    main()