Spaces:

rote1
/

IAGO

Running

App Files Files Community

IAGO / examples /run_gaia_roleplaying.py

Wendong-Fan

fix

6404ebc about 1 month ago

raw

history blame contribute delete

4.81 kB

	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

	from dotenv import load_dotenv


	import os

	from camel.models import ModelFactory
	from camel.logger import get_logger
	from camel.toolkits import (
	AudioAnalysisToolkit,
	CodeExecutionToolkit,
	ExcelToolkit,
	ImageAnalysisToolkit,
	SearchToolkit,
	VideoAnalysisToolkit,
	BrowserToolkit,
	FileWriteToolkit,
	)
	from camel.types import ModelPlatformType, ModelType
	from camel.configs import ChatGPTConfig

	from owl.utils import GAIABenchmark
	from camel.logger import set_log_level

	import pathlib

	base_dir = pathlib.Path(__file__).parent.parent
	env_path = base_dir / "owl" / ".env"
	load_dotenv(dotenv_path=str(env_path))

	set_log_level(level="DEBUG")

	logger = get_logger(__name__)

	# Configuration
	LEVEL = 1
	SAVE_RESULT = True
	test_idx = [0]


	def main():
	"""Main function to run the GAIA benchmark."""
	# Create cache directory
	cache_dir = "tmp/"
	os.makedirs(cache_dir, exist_ok=True)
	result_dir = "results/"
	os.makedirs(result_dir, exist_ok=True)

	# Create models for different components
	models = {
	"user": ModelFactory.create(
	model_platform=ModelPlatformType.OPENAI,
	model_type=ModelType.GPT_4O,
	model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
	),
	"assistant": ModelFactory.create(
	model_platform=ModelPlatformType.OPENAI,
	model_type=ModelType.GPT_4O,
	model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
	),
	"browsing": ModelFactory.create(
	model_platform=ModelPlatformType.OPENAI,
	model_type=ModelType.GPT_4O,
	model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
	),
	"planning": ModelFactory.create(
	model_platform=ModelPlatformType.OPENAI,
	model_type=ModelType.GPT_4O,
	model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
	),
	"video": ModelFactory.create(
	model_platform=ModelPlatformType.OPENAI,
	model_type=ModelType.GPT_4O,
	model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
	),
	"image": ModelFactory.create(
	model_platform=ModelPlatformType.OPENAI,
	model_type=ModelType.GPT_4O,
	model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
	),
	}

	# Configure toolkits
	tools = [
	*BrowserToolkit(
	headless=False, # Set to True for headless mode (e.g., on remote servers)
	web_agent_model=models["browsing"],
	planning_agent_model=models["planning"],
	).get_tools(),
	*VideoAnalysisToolkit(
	model=models["video"]
	).get_tools(), # This requires OpenAI Key
	*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
	*CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
	*ImageAnalysisToolkit(model=models["image"]).get_tools(),
	*SearchToolkit().get_tools(),
	*ExcelToolkit().get_tools(),
	*FileWriteToolkit(output_dir="./").get_tools(),
	]

	# Configure agent roles and parameters
	user_agent_kwargs = {"model": models["user"]}
	assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}

	# Initialize benchmark
	benchmark = GAIABenchmark(data_dir="data/gaia", save_to="results/result.json")

	# Print benchmark information
	print(f"Number of validation examples: {len(benchmark.valid)}")
	print(f"Number of test examples: {len(benchmark.test)}")

	# Run benchmark
	result = benchmark.run(
	on="valid",
	level=LEVEL,
	idx=test_idx,
	save_result=SAVE_RESULT,
	user_role_name="user",
	user_agent_kwargs=user_agent_kwargs,
	assistant_role_name="assistant",
	assistant_agent_kwargs=assistant_agent_kwargs,
	)

	# Output results
	logger.info(f"Correct: {result['correct']}, Total: {result['total']}")
	logger.info(f"Accuracy: {result['accuracy']}")


	if __name__ == "__main__":
	main()