Spaces:

anpigon
/

langchain-qa-bot

Runtime error

App Files Files Community

langchain-qa-bot / docs /langchain /libs /community /langchain_community /callbacks /openai_info.py

anpigon

add langchain docs

ed4d993 11 months ago

raw

history blame

9.97 kB

	"""Callback Handler that prints to std out."""

	import threading
	from typing import Any, Dict, List

	from langchain_core.callbacks import BaseCallbackHandler
	from langchain_core.messages import AIMessage
	from langchain_core.outputs import ChatGeneration, LLMResult

	MODEL_COST_PER_1K_TOKENS = {
	# GPT-4o input
	"gpt-4o": 0.005,
	"gpt-4o-2024-05-13": 0.005,
	# GPT-4o output
	"gpt-4o-completion": 0.015,
	"gpt-4o-2024-05-13-completion": 0.015,
	# GPT-4 input
	"gpt-4": 0.03,
	"gpt-4-0314": 0.03,
	"gpt-4-0613": 0.03,
	"gpt-4-32k": 0.06,
	"gpt-4-32k-0314": 0.06,
	"gpt-4-32k-0613": 0.06,
	"gpt-4-vision-preview": 0.01,
	"gpt-4-1106-preview": 0.01,
	"gpt-4-0125-preview": 0.01,
	"gpt-4-turbo-preview": 0.01,
	"gpt-4-turbo": 0.01,
	"gpt-4-turbo-2024-04-09": 0.01,
	# GPT-4 output
	"gpt-4-completion": 0.06,
	"gpt-4-0314-completion": 0.06,
	"gpt-4-0613-completion": 0.06,
	"gpt-4-32k-completion": 0.12,
	"gpt-4-32k-0314-completion": 0.12,
	"gpt-4-32k-0613-completion": 0.12,
	"gpt-4-vision-preview-completion": 0.03,
	"gpt-4-1106-preview-completion": 0.03,
	"gpt-4-0125-preview-completion": 0.03,
	"gpt-4-turbo-preview-completion": 0.03,
	"gpt-4-turbo-completion": 0.03,
	"gpt-4-turbo-2024-04-09-completion": 0.03,
	# GPT-3.5 input
	# gpt-3.5-turbo points at gpt-3.5-turbo-0613 until Feb 16, 2024.
	# Switches to gpt-3.5-turbo-0125 after.
	"gpt-3.5-turbo": 0.0015,
	"gpt-3.5-turbo-0125": 0.0005,
	"gpt-3.5-turbo-0301": 0.0015,
	"gpt-3.5-turbo-0613": 0.0015,
	"gpt-3.5-turbo-1106": 0.001,
	"gpt-3.5-turbo-instruct": 0.0015,
	"gpt-3.5-turbo-16k": 0.003,
	"gpt-3.5-turbo-16k-0613": 0.003,
	# GPT-3.5 output
	# gpt-3.5-turbo points at gpt-3.5-turbo-0613 until Feb 16, 2024.
	# Switches to gpt-3.5-turbo-0125 after.
	"gpt-3.5-turbo-completion": 0.002,
	"gpt-3.5-turbo-0125-completion": 0.0015,
	"gpt-3.5-turbo-0301-completion": 0.002,
	"gpt-3.5-turbo-0613-completion": 0.002,
	"gpt-3.5-turbo-1106-completion": 0.002,
	"gpt-3.5-turbo-instruct-completion": 0.002,
	"gpt-3.5-turbo-16k-completion": 0.004,
	"gpt-3.5-turbo-16k-0613-completion": 0.004,
	# Azure GPT-35 input
	"gpt-35-turbo": 0.0015, # Azure OpenAI version of ChatGPT
	"gpt-35-turbo-0301": 0.0015, # Azure OpenAI version of ChatGPT
	"gpt-35-turbo-0613": 0.0015,
	"gpt-35-turbo-instruct": 0.0015,
	"gpt-35-turbo-16k": 0.003,
	"gpt-35-turbo-16k-0613": 0.003,
	# Azure GPT-35 output
	"gpt-35-turbo-completion": 0.002, # Azure OpenAI version of ChatGPT
	"gpt-35-turbo-0301-completion": 0.002, # Azure OpenAI version of ChatGPT
	"gpt-35-turbo-0613-completion": 0.002,
	"gpt-35-turbo-instruct-completion": 0.002,
	"gpt-35-turbo-16k-completion": 0.004,
	"gpt-35-turbo-16k-0613-completion": 0.004,
	# Others
	"text-ada-001": 0.0004,
	"ada": 0.0004,
	"text-babbage-001": 0.0005,
	"babbage": 0.0005,
	"text-curie-001": 0.002,
	"curie": 0.002,
	"text-davinci-003": 0.02,
	"text-davinci-002": 0.02,
	"code-davinci-002": 0.02,
	# Fine Tuned input
	"babbage-002-finetuned": 0.0016,
	"davinci-002-finetuned": 0.012,
	"gpt-3.5-turbo-0613-finetuned": 0.003,
	"gpt-3.5-turbo-1106-finetuned": 0.003,
	"gpt-3.5-turbo-0125-finetuned": 0.003,
	# Fine Tuned output
	"babbage-002-finetuned-completion": 0.0016,
	"davinci-002-finetuned-completion": 0.012,
	"gpt-3.5-turbo-0613-finetuned-completion": 0.006,
	"gpt-3.5-turbo-1106-finetuned-completion": 0.006,
	"gpt-3.5-turbo-0125-finetuned-completion": 0.006,
	# Azure Fine Tuned input
	"babbage-002-azure-finetuned": 0.0004,
	"davinci-002-azure-finetuned": 0.002,
	"gpt-35-turbo-0613-azure-finetuned": 0.0015,
	# Azure Fine Tuned output
	"babbage-002-azure-finetuned-completion": 0.0004,
	"davinci-002-azure-finetuned-completion": 0.002,
	"gpt-35-turbo-0613-azure-finetuned-completion": 0.002,
	# Legacy fine-tuned models
	"ada-finetuned-legacy": 0.0016,
	"babbage-finetuned-legacy": 0.0024,
	"curie-finetuned-legacy": 0.012,
	"davinci-finetuned-legacy": 0.12,
	}


	def standardize_model_name(
	model_name: str,
	is_completion: bool = False,
	) -> str:
	"""
	Standardize the model name to a format that can be used in the OpenAI API.

	Args:
	model_name: Model name to standardize.
	is_completion: Whether the model is used for completion or not.
	Defaults to False.

	Returns:
	Standardized model name.

	"""
	model_name = model_name.lower()
	if ".ft-" in model_name:
	model_name = model_name.split(".ft-")[0] + "-azure-finetuned"
	if ":ft-" in model_name:
	model_name = model_name.split(":")[0] + "-finetuned-legacy"
	if "ft:" in model_name:
	model_name = model_name.split(":")[1] + "-finetuned"
	if is_completion and (
	model_name.startswith("gpt-4")
	or model_name.startswith("gpt-3.5")
	or model_name.startswith("gpt-35")
	or ("finetuned" in model_name and "legacy" not in model_name)
	):
	return model_name + "-completion"
	else:
	return model_name


	def get_openai_token_cost_for_model(
	model_name: str, num_tokens: int, is_completion: bool = False
	) -> float:
	"""
	Get the cost in USD for a given model and number of tokens.

	Args:
	model_name: Name of the model
	num_tokens: Number of tokens.
	is_completion: Whether the model is used for completion or not.
	Defaults to False.

	Returns:
	Cost in USD.
	"""
	model_name = standardize_model_name(model_name, is_completion=is_completion)
	if model_name not in MODEL_COST_PER_1K_TOKENS:
	raise ValueError(
	f"Unknown model: {model_name}. Please provide a valid OpenAI model name."
	"Known models are: " + ", ".join(MODEL_COST_PER_1K_TOKENS.keys())
	)
	return MODEL_COST_PER_1K_TOKENS[model_name] * (num_tokens / 1000)


	class OpenAICallbackHandler(BaseCallbackHandler):
	"""Callback Handler that tracks OpenAI info."""

	total_tokens: int = 0
	prompt_tokens: int = 0
	completion_tokens: int = 0
	successful_requests: int = 0
	total_cost: float = 0.0

	def __init__(self) -> None:
	super().__init__()
	self._lock = threading.Lock()

	def __repr__(self) -> str:
	return (
	f"Tokens Used: {self.total_tokens}\n"
	f"\tPrompt Tokens: {self.prompt_tokens}\n"
	f"\tCompletion Tokens: {self.completion_tokens}\n"
	f"Successful Requests: {self.successful_requests}\n"
	f"Total Cost (USD): ${self.total_cost}"
	)

	@property
	def always_verbose(self) -> bool:
	"""Whether to call verbose callbacks even if verbose is False."""
	return True

	def on_llm_start(
	self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
	) -> None:
	"""Print out the prompts."""
	pass

	def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
	"""Print out the token."""
	pass

	def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
	"""Collect token usage."""
	# Check for usage_metadata (langchain-core >= 0.2.2)
	try:
	generation = response.generations[0][0]
	except IndexError:
	generation = None
	if isinstance(generation, ChatGeneration):
	try:
	message = generation.message
	if isinstance(message, AIMessage):
	usage_metadata = message.usage_metadata
	else:
	usage_metadata = None
	except AttributeError:
	usage_metadata = None
	else:
	usage_metadata = None
	if usage_metadata:
	token_usage = {"total_tokens": usage_metadata["total_tokens"]}
	completion_tokens = usage_metadata["output_tokens"]
	prompt_tokens = usage_metadata["input_tokens"]
	if response.llm_output is None:
	# model name (and therefore cost) is unavailable in
	# streaming responses
	model_name = ""
	else:
	model_name = standardize_model_name(
	response.llm_output.get("model_name", "")
	)

	else:
	if response.llm_output is None:
	return None

	if "token_usage" not in response.llm_output:
	with self._lock:
	self.successful_requests += 1
	return None

	# compute tokens and cost for this request
	token_usage = response.llm_output["token_usage"]
	completion_tokens = token_usage.get("completion_tokens", 0)
	prompt_tokens = token_usage.get("prompt_tokens", 0)
	model_name = standardize_model_name(
	response.llm_output.get("model_name", "")
	)
	if model_name in MODEL_COST_PER_1K_TOKENS:
	completion_cost = get_openai_token_cost_for_model(
	model_name, completion_tokens, is_completion=True
	)
	prompt_cost = get_openai_token_cost_for_model(model_name, prompt_tokens)
	else:
	completion_cost = 0
	prompt_cost = 0

	# update shared state behind lock
	with self._lock:
	self.total_cost += prompt_cost + completion_cost
	self.total_tokens += token_usage.get("total_tokens", 0)
	self.prompt_tokens += prompt_tokens
	self.completion_tokens += completion_tokens
	self.successful_requests += 1

	def __copy__(self) -> "OpenAICallbackHandler":
	"""Return a copy of the callback handler."""
	return self

	def __deepcopy__(self, memo: Any) -> "OpenAICallbackHandler":
	"""Return a deep copy of the callback handler."""
	return self