|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import json |
|
import logging |
|
from functools import wraps |
|
from typing import List, Optional, Union |
|
|
|
from camel.agents import ChatAgent |
|
from camel.configs import ChatGPTConfig |
|
from camel.models import BaseModelBackend, ModelFactory |
|
from camel.runtime import BaseRuntime |
|
from camel.runtime.utils import FunctionRiskToolkit, IgnoreRiskToolkit |
|
from camel.toolkits import FunctionTool |
|
from camel.types import ModelPlatformType, ModelType |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
GUARDPROMPT = """You are a function safety evaluator tasked with assessing the |
|
potential risk level of a given function based on both its description |
|
and parameters. Your goal is to determine if the function may pose any |
|
harm to the user's environment, such as deleting files, executing |
|
arbitrary code, or accessing sensitive information. Pay special attention |
|
to the provided parameters — even if a function has the potential to be |
|
dangerous, its actual parameters may indicate harmless behavior, and the |
|
risk level should be adjusted accordingly. Use the `function_risk` tool to |
|
assign a risk score to the function, based on the following criteria: |
|
|
|
- **Score 1**: No harm. This includes simple operations like mathematical |
|
calculations, content searches, or data retrievals that do not impact |
|
the user's environment or access sensitive resources. This also |
|
includes functions with potentially dangerous capabilities that have |
|
harmless, controlled parameters that ensure safe execution. |
|
- **Score 2**: Minimal harm. The function might read user files, retrieve |
|
non-sensitive data, or access other low-risk resources, |
|
posing little risk to the user. |
|
- **Score 3**: Risk present. The function might delete files, modify the |
|
file system, execute arbitrary code, or access sensitive data, which |
|
could negatively impact the user's environment. However, if the |
|
actual parameters provided clearly indicate safe and restricted |
|
usage, this risk should be downgraded accordingly. |
|
|
|
When evaluating, always consider both the function's description and its |
|
specific parameters. If the function appears risky due to its design but |
|
the provided parameters indicate a safe and non-impactful operation, |
|
adjust the risk score to reflect this. Assign an appropriate risk score |
|
and provide a brief explanation of your reasoning based on the function's |
|
description and the actual parameters given. |
|
YOU MUST USE THE `function_risk` TOOL TO ASSESS THE RISK |
|
LEVEL OF EACH FUNCTION. |
|
""" |
|
|
|
|
|
class LLMGuardRuntime(BaseRuntime): |
|
r"""A runtime that evaluates the risk level of functions using |
|
a language model. |
|
|
|
Arguments: |
|
prompt (str): The prompt to use for the language model. (default: |
|
:obj:`GUARDPROMPT`) |
|
model (BaseModelBackend): The language model to use. (default::obj: |
|
`None`) |
|
verbose (bool): Whether to print verbose output. (default::obj: |
|
`False`) |
|
""" |
|
|
|
def __init__( |
|
self, |
|
prompt: str = GUARDPROMPT, |
|
model: Optional[BaseModelBackend] = None, |
|
verbose: bool = False, |
|
): |
|
super().__init__() |
|
self.prompt = prompt |
|
self.model = model |
|
self.verbose = verbose |
|
|
|
if not self.model: |
|
self.model = ModelFactory.create( |
|
model_platform=ModelPlatformType.DEFAULT, |
|
model_type=ModelType.DEFAULT, |
|
model_config_dict=ChatGPTConfig().as_dict(), |
|
) |
|
self.ignore_toolkit = IgnoreRiskToolkit(verbose=verbose) |
|
self.ignore_tool = self.ignore_toolkit.get_tools()[0] |
|
self.tools_map[self.ignore_tool.get_function_name()] = self.ignore_tool |
|
|
|
self.agent = ChatAgent( |
|
system_message=self.prompt, |
|
model=self.model, |
|
external_tools=[ |
|
*FunctionRiskToolkit(verbose=verbose).get_tools(), |
|
], |
|
) |
|
|
|
def add( |
|
self, |
|
funcs: Union[FunctionTool, List[FunctionTool]], |
|
threshold: int = 2, |
|
) -> "LLMGuardRuntime": |
|
r"""Add a function or list of functions to the runtime. |
|
|
|
Args: |
|
funcs (FunctionTool or List[FunctionTool]): The function or |
|
list of functions to add. |
|
threshold (int): The risk threshold for functions. |
|
(default::obj:`2`) |
|
|
|
Returns: |
|
LLMGuardRuntime: The current runtime. |
|
""" |
|
|
|
if not isinstance(funcs, list): |
|
funcs = [funcs] |
|
|
|
for func in funcs: |
|
inner_func = func.func |
|
|
|
|
|
@wraps(inner_func) |
|
def wrapper( |
|
*args, |
|
func=func, |
|
inner_func=inner_func, |
|
threshold=threshold, |
|
**kwargs, |
|
): |
|
function_name = func.get_function_name() |
|
if function_name in self.ignore_toolkit.ignored_risks: |
|
reason = self.ignore_toolkit.ignored_risks.pop( |
|
function_name |
|
) |
|
logger.info( |
|
f"Ignored risk for function {function_name}: {reason}" |
|
) |
|
return inner_func(*args, **kwargs) |
|
self.agent.init_messages() |
|
resp = self.agent.step( |
|
f""" |
|
Function is: {function_name} |
|
Function description: {func.get_function_description()} |
|
Args: {args} |
|
Kwargs: {kwargs} |
|
""" |
|
) |
|
tool_call = resp.info.get("external_tool_request", None) |
|
if not tool_call: |
|
logger.error("No tool call found in response.") |
|
return { |
|
"error": "Risk assessment failed. Disabling function." |
|
} |
|
data = tool_call.function.arguments |
|
data = json.loads(data) |
|
if threshold < data["score"]: |
|
message = ( |
|
f"Risk assessment not passed for {function_name}." |
|
f"Score: {data['score']} > Threshold: {threshold}" |
|
f"\nReason: {data['reason']}" |
|
) |
|
logger.warning(message) |
|
return {"error": message} |
|
|
|
logger.info( |
|
( |
|
f"Function {function_name} passed risk assessment." |
|
f"Score: {data['score']}, Reason: {data['reason']}" |
|
) |
|
) |
|
if self.verbose: |
|
print( |
|
( |
|
f"Function {function_name} passed risk assessment." |
|
f"Score: {data['score']}, Reason: {data['reason']}" |
|
) |
|
) |
|
return inner_func(*args, **kwargs) |
|
|
|
func.func = wrapper |
|
self.tools_map[func.get_function_name()] = func |
|
self.ignore_toolkit.add(func.get_function_name()) |
|
|
|
return self |
|
|
|
def reset(self) -> "LLMGuardRuntime": |
|
r"""Resets the runtime to its initial state.""" |
|
self.ignore_toolkit.ignored_risks = dict() |
|
self.agent.reset() |
|
|
|
return self |
|
|