chat / app /utils /token_counter.py
ariansyahdedy's picture
Add memory
7b2511b
# token_counter.py
import os
import tiktoken
# Choose the encoding based on your model, e.g., 'cl100k_base' for OpenAI models
encoding = tiktoken.get_encoding("cl100k_base")
def count_tokens(text):
tokens = encoding.encode(text)
return len(tokens)
class TokenCounter:
def __init__(self):
self.total_tokens = 0
self.doc_tokens = {}
def add_document(self, doc_id, text):
num_tokens = count_tokens(text)
self.doc_tokens[doc_id] = num_tokens
self.total_tokens += num_tokens
def remove_document(self, doc_id):
if doc_id in self.doc_tokens:
self.total_tokens -= self.doc_tokens[doc_id]
del self.doc_tokens[doc_id]
def get_total_tokens(self):
return self.total_tokens
class SimpleTokenCounter:
def count_tokens(self, text: str) -> int:
return len(text.split())
class TikTokenCounter:
def __init__(self, model_name: str = "gpt-4"):
import tiktoken
self.encoding = tiktoken.encoding_for_model(model_name)
def count_tokens(self, text: str) -> int:
return len(self.encoding.encode(text))