Spaces:

anpigon
/

langchain-qa-bot

Runtime error

App Files Files Community

langchain-qa-bot / docs /langchain /libs /community /langchain_community /callbacks /fiddler_callback.py

anpigon

add langchain docs

ed4d993 11 months ago

raw

history blame

11.4 kB

	import time
	from typing import Any, Dict, List, Optional
	from uuid import UUID

	from langchain_core.callbacks import BaseCallbackHandler
	from langchain_core.outputs import LLMResult
	from langchain_core.utils import guard_import

	from langchain_community.callbacks.utils import import_pandas

	# Define constants

	# LLMResult keys
	TOKEN_USAGE = "token_usage"
	TOTAL_TOKENS = "total_tokens"
	PROMPT_TOKENS = "prompt_tokens"
	COMPLETION_TOKENS = "completion_tokens"
	RUN_ID = "run_id"
	MODEL_NAME = "model_name"
	GOOD = "good"
	BAD = "bad"
	NEUTRAL = "neutral"
	SUCCESS = "success"
	FAILURE = "failure"

	# Default values
	DEFAULT_MAX_TOKEN = 65536
	DEFAULT_MAX_DURATION = 120000

	# Fiddler specific constants
	PROMPT = "prompt"
	RESPONSE = "response"
	CONTEXT = "context"
	DURATION = "duration"
	FEEDBACK = "feedback"
	LLM_STATUS = "llm_status"

	FEEDBACK_POSSIBLE_VALUES = [GOOD, BAD, NEUTRAL]

	# Define a dataset dictionary
	_dataset_dict = {
	PROMPT: ["fiddler"] * 10,
	RESPONSE: ["fiddler"] * 10,
	CONTEXT: ["fiddler"] * 10,
	FEEDBACK: ["good"] * 10,
	LLM_STATUS: ["success"] * 10,
	MODEL_NAME: ["fiddler"] * 10,
	RUN_ID: ["123e4567-e89b-12d3-a456-426614174000"] * 10,
	TOTAL_TOKENS: [0, DEFAULT_MAX_TOKEN] * 5,
	PROMPT_TOKENS: [0, DEFAULT_MAX_TOKEN] * 5,
	COMPLETION_TOKENS: [0, DEFAULT_MAX_TOKEN] * 5,
	DURATION: [1, DEFAULT_MAX_DURATION] * 5,
	}


	def import_fiddler() -> Any:
	"""Import the fiddler python package and raise an error if it is not installed."""
	return guard_import("fiddler", pip_name="fiddler-client")


	# First, define custom callback handler implementations
	class FiddlerCallbackHandler(BaseCallbackHandler):
	def __init__(
	self,
	url: str,
	org: str,
	project: str,
	model: str,
	api_key: str,
	) -> None:
	"""
	Initialize Fiddler callback handler.

	Args:
	url: Fiddler URL (e.g. https://demo.fiddler.ai).
	Make sure to include the protocol (http/https).
	org: Fiddler organization id
	project: Fiddler project name to publish events to
	model: Fiddler model name to publish events to
	api_key: Fiddler authentication token
	"""
	super().__init__()
	# Initialize Fiddler client and other necessary properties
	self.fdl = import_fiddler()
	self.pd = import_pandas()

	self.url = url
	self.org = org
	self.project = project
	self.model = model
	self.api_key = api_key
	self._df = self.pd.DataFrame(_dataset_dict)

	self.run_id_prompts: Dict[UUID, List[str]] = {}
	self.run_id_response: Dict[UUID, List[str]] = {}
	self.run_id_starttime: Dict[UUID, int] = {}

	# Initialize Fiddler client here
	self.fiddler_client = self.fdl.FiddlerApi(url, org_id=org, auth_token=api_key)

	if self.project not in self.fiddler_client.get_project_names():
	print( # noqa: T201
	f"adding project {self.project}." "This only has to be done once."
	)
	try:
	self.fiddler_client.add_project(self.project)
	except Exception as e:
	print( # noqa: T201
	f"Error adding project {self.project}:"
	"{e}. Fiddler integration will not work."
	)
	raise e

	dataset_info = self.fdl.DatasetInfo.from_dataframe(
	self._df, max_inferred_cardinality=0
	)

	# Set feedback column to categorical
	for i in range(len(dataset_info.columns)):
	if dataset_info.columns[i].name == FEEDBACK:
	dataset_info.columns[i].data_type = self.fdl.DataType.CATEGORY
	dataset_info.columns[i].possible_values = FEEDBACK_POSSIBLE_VALUES

	elif dataset_info.columns[i].name == LLM_STATUS:
	dataset_info.columns[i].data_type = self.fdl.DataType.CATEGORY
	dataset_info.columns[i].possible_values = [SUCCESS, FAILURE]

	if self.model not in self.fiddler_client.get_model_names(self.project):
	if self.model not in self.fiddler_client.get_dataset_names(self.project):
	print( # noqa: T201
	f"adding dataset {self.model} to project {self.project}."
	"This only has to be done once."
	)
	try:
	self.fiddler_client.upload_dataset(
	project_id=self.project,
	dataset_id=self.model,
	dataset={"train": self._df},
	info=dataset_info,
	)
	except Exception as e:
	print( # noqa: T201
	f"Error adding dataset {self.model}: {e}."
	"Fiddler integration will not work."
	)
	raise e

	model_info = self.fdl.ModelInfo.from_dataset_info(
	dataset_info=dataset_info,
	dataset_id="train",
	model_task=self.fdl.ModelTask.LLM,
	features=[PROMPT, CONTEXT, RESPONSE],
	target=FEEDBACK,
	metadata_cols=[
	RUN_ID,
	TOTAL_TOKENS,
	PROMPT_TOKENS,
	COMPLETION_TOKENS,
	MODEL_NAME,
	DURATION,
	],
	custom_features=self.custom_features,
	)
	print( # noqa: T201
	f"adding model {self.model} to project {self.project}."
	"This only has to be done once."
	)
	try:
	self.fiddler_client.add_model(
	project_id=self.project,
	dataset_id=self.model,
	model_id=self.model,
	model_info=model_info,
	)
	except Exception as e:
	print( # noqa: T201
	f"Error adding model {self.model}: {e}."
	"Fiddler integration will not work."
	)
	raise e

	@property
	def custom_features(self) -> list:
	"""
	Define custom features for the model to automatically enrich the data with.
	Here, we enable the following enrichments:
	- Automatic Embedding generation for prompt and response
	- Text Statistics such as:
	- Automated Readability Index
	- Coleman Liau Index
	- Dale Chall Readability Score
	- Difficult Words
	- Flesch Reading Ease
	- Flesch Kincaid Grade
	- Gunning Fog
	- Linsear Write Formula
	- PII - Personal Identifiable Information
	- Sentiment Analysis

	"""

	return [
	self.fdl.Enrichment(
	name="Prompt Embedding",
	enrichment="embedding",
	columns=[PROMPT],
	),
	self.fdl.TextEmbedding(
	name="Prompt CF",
	source_column=PROMPT,
	column="Prompt Embedding",
	),
	self.fdl.Enrichment(
	name="Response Embedding",
	enrichment="embedding",
	columns=[RESPONSE],
	),
	self.fdl.TextEmbedding(
	name="Response CF",
	source_column=RESPONSE,
	column="Response Embedding",
	),
	self.fdl.Enrichment(
	name="Text Statistics",
	enrichment="textstat",
	columns=[PROMPT, RESPONSE],
	config={
	"statistics": [
	"automated_readability_index",
	"coleman_liau_index",
	"dale_chall_readability_score",
	"difficult_words",
	"flesch_reading_ease",
	"flesch_kincaid_grade",
	"gunning_fog",
	"linsear_write_formula",
	]
	},
	),
	self.fdl.Enrichment(
	name="PII",
	enrichment="pii",
	columns=[PROMPT, RESPONSE],
	),
	self.fdl.Enrichment(
	name="Sentiment",
	enrichment="sentiment",
	columns=[PROMPT, RESPONSE],
	),
	]

	def _publish_events(
	self,
	run_id: UUID,
	prompt_responses: List[str],
	duration: int,
	llm_status: str,
	model_name: Optional[str] = "",
	token_usage_dict: Optional[Dict[str, Any]] = None,
	) -> None:
	"""
	Publish events to fiddler
	"""

	prompt_count = len(self.run_id_prompts[run_id])
	df = self.pd.DataFrame(
	{
	PROMPT: self.run_id_prompts[run_id],
	RESPONSE: prompt_responses,
	RUN_ID: [str(run_id)] * prompt_count,
	DURATION: [duration] * prompt_count,
	LLM_STATUS: [llm_status] * prompt_count,
	MODEL_NAME: [model_name] * prompt_count,
	}
	)

	if token_usage_dict:
	for key, value in token_usage_dict.items():
	df[key] = [value] * prompt_count if isinstance(value, int) else value

	try:
	if df.shape[0] > 1:
	self.fiddler_client.publish_events_batch(self.project, self.model, df)
	else:
	df_dict = df.to_dict(orient="records")
	self.fiddler_client.publish_event(
	self.project, self.model, event=df_dict[0]
	)
	except Exception as e:
	print( # noqa: T201
	f"Error publishing events to fiddler: {e}. continuing..."
	)

	def on_llm_start(
	self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
	) -> Any:
	run_id = kwargs[RUN_ID]
	self.run_id_prompts[run_id] = prompts
	self.run_id_starttime[run_id] = int(time.time() * 1000)

	def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
	flattened_llmresult = response.flatten()
	run_id = kwargs[RUN_ID]
	run_duration = int(time.time() * 1000) - self.run_id_starttime[run_id]
	model_name = ""
	token_usage_dict = {}

	if isinstance(response.llm_output, dict):
	token_usage_dict = {
	k: v
	for k, v in response.llm_output.items()
	if k in [TOTAL_TOKENS, PROMPT_TOKENS, COMPLETION_TOKENS]
	}
	model_name = response.llm_output.get(MODEL_NAME, "")

	prompt_responses = [
	llmresult.generations[0][0].text for llmresult in flattened_llmresult
	]

	self._publish_events(
	run_id,
	prompt_responses,
	run_duration,
	SUCCESS,
	model_name,
	token_usage_dict,
	)

	def on_llm_error(self, error: BaseException, **kwargs: Any) -> None:
	run_id = kwargs[RUN_ID]
	duration = int(time.time() * 1000) - self.run_id_starttime[run_id]

	self._publish_events(
	run_id, [""] * len(self.run_id_prompts[run_id]), duration, FAILURE
	)