Spaces:

rote1
/

IAGO

Running

App Files Files Community

IAGO / deep-swarm /camel /runtime /llm_guard_runtime.py

zyh-ralph

initial update

62da328 2 months ago

raw

history blame

8.1 kB

	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	import json
	import logging
	from functools import wraps
	from typing import List, Optional, Union

	from camel.agents import ChatAgent
	from camel.configs import ChatGPTConfig
	from camel.models import BaseModelBackend, ModelFactory
	from camel.runtime import BaseRuntime
	from camel.runtime.utils import FunctionRiskToolkit, IgnoreRiskToolkit
	from camel.toolkits import FunctionTool
	from camel.types import ModelPlatformType, ModelType

	logger = logging.getLogger(__name__)

	GUARDPROMPT = """You are a function safety evaluator tasked with assessing the
	potential risk level of a given function based on both its description
	and parameters. Your goal is to determine if the function may pose any
	harm to the user's environment, such as deleting files, executing
	arbitrary code, or accessing sensitive information. Pay special attention
	to the provided parameters — even if a function has the potential to be
	dangerous, its actual parameters may indicate harmless behavior, and the
	risk level should be adjusted accordingly. Use the `function_risk` tool to
	assign a risk score to the function, based on the following criteria:

	- Score 1: No harm. This includes simple operations like mathematical
	calculations, content searches, or data retrievals that do not impact
	the user's environment or access sensitive resources. This also
	includes functions with potentially dangerous capabilities that have
	harmless, controlled parameters that ensure safe execution.
	- Score 2: Minimal harm. The function might read user files, retrieve
	non-sensitive data, or access other low-risk resources,
	posing little risk to the user.
	- Score 3: Risk present. The function might delete files, modify the
	file system, execute arbitrary code, or access sensitive data, which
	could negatively impact the user's environment. However, if the
	actual parameters provided clearly indicate safe and restricted
	usage, this risk should be downgraded accordingly.

	When evaluating, always consider both the function's description and its
	specific parameters. If the function appears risky due to its design but
	the provided parameters indicate a safe and non-impactful operation,
	adjust the risk score to reflect this. Assign an appropriate risk score
	and provide a brief explanation of your reasoning based on the function's
	description and the actual parameters given.
	YOU MUST USE THE `function_risk` TOOL TO ASSESS THE RISK
	LEVEL OF EACH FUNCTION.
	"""


	class LLMGuardRuntime(BaseRuntime):
	r"""A runtime that evaluates the risk level of functions using
	a language model.

	Arguments:
	prompt (str): The prompt to use for the language model. (default:
	:obj:`GUARDPROMPT`)
	model (BaseModelBackend): The language model to use. (default::obj:
	`None`)
	verbose (bool): Whether to print verbose output. (default::obj:
	`False`)
	"""

	def __init__(
	self,
	prompt: str = GUARDPROMPT,
	model: Optional[BaseModelBackend] = None,
	verbose: bool = False,
	):
	super().__init__()
	self.prompt = prompt
	self.model = model
	self.verbose = verbose

	if not self.model:
	self.model = ModelFactory.create(
	model_platform=ModelPlatformType.DEFAULT,
	model_type=ModelType.DEFAULT,
	model_config_dict=ChatGPTConfig().as_dict(),
	)
	self.ignore_toolkit = IgnoreRiskToolkit(verbose=verbose)
	self.ignore_tool = self.ignore_toolkit.get_tools()[0]
	self.tools_map[self.ignore_tool.get_function_name()] = self.ignore_tool

	self.agent = ChatAgent(
	system_message=self.prompt,
	model=self.model,
	external_tools=[
	*FunctionRiskToolkit(verbose=verbose).get_tools(),
	],
	)

	def add( # type: ignore[override]
	self,
	funcs: Union[FunctionTool, List[FunctionTool]],
	threshold: int = 2,
	) -> "LLMGuardRuntime":
	r"""Add a function or list of functions to the runtime.

	Args:
	funcs (FunctionTool or List[FunctionTool]): The function or
	list of functions to add.
	threshold (int): The risk threshold for functions.
	(default::obj:`2`)

	Returns:
	LLMGuardRuntime: The current runtime.
	"""

	if not isinstance(funcs, list):
	funcs = [funcs]

	for func in funcs:
	inner_func = func.func

	# Create a wrapper that explicitly binds `func`
	@wraps(inner_func)
	def wrapper(
	*args,
	func=func,
	inner_func=inner_func,
	threshold=threshold,
	**kwargs,
	):
	function_name = func.get_function_name()
	if function_name in self.ignore_toolkit.ignored_risks:
	reason = self.ignore_toolkit.ignored_risks.pop(
	function_name
	)
	logger.info(
	f"Ignored risk for function {function_name}: {reason}"
	)
	return inner_func(args, *kwargs)
	self.agent.init_messages()
	resp = self.agent.step(
	f"""
	Function is: {function_name}
	Function description: {func.get_function_description()}
	Args: {args}
	Kwargs: {kwargs}
	"""
	)
	tool_call = resp.info.get("external_tool_request", None)
	if not tool_call:
	logger.error("No tool call found in response.")
	return {
	"error": "Risk assessment failed. Disabling function."
	}
	data = tool_call.function.arguments
	data = json.loads(data)
	if threshold < data["score"]:
	message = (
	f"Risk assessment not passed for {function_name}."
	f"Score: {data['score']} > Threshold: {threshold}"
	f"\nReason: {data['reason']}"
	)
	logger.warning(message)
	return {"error": message}

	logger.info(
	(
	f"Function {function_name} passed risk assessment."
	f"Score: {data['score']}, Reason: {data['reason']}"
	)
	)
	if self.verbose:
	print(
	(
	f"Function {function_name} passed risk assessment."
	f"Score: {data['score']}, Reason: {data['reason']}"
	)
	)
	return inner_func(args, *kwargs)

	func.func = wrapper
	self.tools_map[func.get_function_name()] = func
	self.ignore_toolkit.add(func.get_function_name())

	return self

	def reset(self) -> "LLMGuardRuntime":
	r"""Resets the runtime to its initial state."""
	self.ignore_toolkit.ignored_risks = dict()
	self.agent.reset()

	return self