File size: 4,808 Bytes
4ca8fdc 62da328 4ca8fdc 62da328 38255bb 4ca8fdc 38255bb 6d7b1c1 d557cc1 38255bb 40d5322 4ca8fdc 62da328 7a12aab 4ca8fdc 7a12aab 4ca8fdc 62da328 38255bb 62da328 38255bb 62da328 3cebb1b 62da328 38255bb 0848a53 38255bb 4ca8fdc 38255bb 6d7b1c1 38255bb 6404ebc 38255bb 35ee5f2 4ca8fdc 35ee5f2 3863783 38255bb 4ca8fdc 38255bb d557cc1 35ee5f2 4ca8fdc 38255bb 62da328 38255bb 4ca8fdc 62da328 38255bb 62da328 38255bb 62da328 4ca8fdc 62da328 38255bb 62da328 38255bb 62da328 38255bb 62da328 38255bb 20bb6a0 62da328 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
from dotenv import load_dotenv
import os
from camel.models import ModelFactory
from camel.logger import get_logger
from camel.toolkits import (
AudioAnalysisToolkit,
CodeExecutionToolkit,
ExcelToolkit,
ImageAnalysisToolkit,
SearchToolkit,
VideoAnalysisToolkit,
BrowserToolkit,
FileWriteToolkit,
)
from camel.types import ModelPlatformType, ModelType
from camel.configs import ChatGPTConfig
from owl.utils import GAIABenchmark
from camel.logger import set_log_level
import pathlib
base_dir = pathlib.Path(__file__).parent.parent
env_path = base_dir / "owl" / ".env"
load_dotenv(dotenv_path=str(env_path))
set_log_level(level="DEBUG")
logger = get_logger(__name__)
# Configuration
LEVEL = 1
SAVE_RESULT = True
test_idx = [0]
def main():
"""Main function to run the GAIA benchmark."""
# Create cache directory
cache_dir = "tmp/"
os.makedirs(cache_dir, exist_ok=True)
result_dir = "results/"
os.makedirs(result_dir, exist_ok=True)
# Create models for different components
models = {
"user": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"assistant": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"browsing": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"planning": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"video": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
"image": ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(),
),
}
# Configure toolkits
tools = [
*BrowserToolkit(
headless=False, # Set to True for headless mode (e.g., on remote servers)
web_agent_model=models["browsing"],
planning_agent_model=models["planning"],
).get_tools(),
*VideoAnalysisToolkit(
model=models["video"]
).get_tools(), # This requires OpenAI Key
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
*CodeExecutionToolkit(sandbox="subprocess", verbose=True).get_tools(),
*ImageAnalysisToolkit(model=models["image"]).get_tools(),
*SearchToolkit().get_tools(),
*ExcelToolkit().get_tools(),
*FileWriteToolkit(output_dir="./").get_tools(),
]
# Configure agent roles and parameters
user_agent_kwargs = {"model": models["user"]}
assistant_agent_kwargs = {"model": models["assistant"], "tools": tools}
# Initialize benchmark
benchmark = GAIABenchmark(data_dir="data/gaia", save_to="results/result.json")
# Print benchmark information
print(f"Number of validation examples: {len(benchmark.valid)}")
print(f"Number of test examples: {len(benchmark.test)}")
# Run benchmark
result = benchmark.run(
on="valid",
level=LEVEL,
idx=test_idx,
save_result=SAVE_RESULT,
user_role_name="user",
user_agent_kwargs=user_agent_kwargs,
assistant_role_name="assistant",
assistant_agent_kwargs=assistant_agent_kwargs,
)
# Output results
logger.info(f"Correct: {result['correct']}, Total: {result['total']}")
logger.info(f"Accuracy: {result['accuracy']}")
if __name__ == "__main__":
main()
|