Wendong-Fan commited on
Commit
5d6d235
·
2 Parent(s): 7c3eae3 35ee5f2

Merge pull request #45 from camel-ai/model_platform

Browse files

enhance: support model platfroms not limited to openai

README.md CHANGED
@@ -121,6 +121,8 @@ In the `owl/.env_example` file, you will find all the necessary API keys along w
121
  1. *Copy and Rename*: Duplicate the `.env_example` file and rename the copy to `.env`.
122
  2. *Fill in Your Keys*: Open the `.env` file and insert your API keys in the corresponding fields.
123
 
 
 
124
  # 🚀 Quick Start
125
 
126
  Run the following minimal example:
 
121
  1. *Copy and Rename*: Duplicate the `.env_example` file and rename the copy to `.env`.
122
  2. *Fill in Your Keys*: Open the `.env` file and insert your API keys in the corresponding fields.
123
 
124
+ > **Note**: For optimal performance, we strongly recommend using OpenAI models. Our experiments show that other models may result in significantly lower performance on complex tasks and benchmarks.
125
+
126
  # 🚀 Quick Start
127
 
128
  Run the following minimal example:
owl/camel/toolkits/document_processing_toolkit.py CHANGED
@@ -35,7 +35,7 @@ class DocumentProcessingToolkit(BaseToolkit):
35
  """
36
  def __init__(self, cache_dir: Optional[str] = None):
37
  self.image_tool = ImageAnalysisToolkit()
38
- self.audio_tool = AudioAnalysisToolkit()
39
  self.excel_tool = ExcelToolkit()
40
 
41
  self.cache_dir = "tmp/"
@@ -59,9 +59,9 @@ class DocumentProcessingToolkit(BaseToolkit):
59
  res = self.image_tool.ask_question_about_image(document_path, "Please make a detailed caption about the image.")
60
  return True, res
61
 
62
- if any(document_path.endswith(ext) for ext in ['.mp3', '.wav']):
63
- res = self.audio_tool.ask_question_about_audio(document_path, "Please transcribe the audio content to text.")
64
- return True, res
65
 
66
  if any(document_path.endswith(ext) for ext in ['xls', 'xlsx']):
67
  res = self.excel_tool.extract_excel_content(document_path)
 
35
  """
36
  def __init__(self, cache_dir: Optional[str] = None):
37
  self.image_tool = ImageAnalysisToolkit()
38
+ # self.audio_tool = AudioAnalysisToolkit()
39
  self.excel_tool = ExcelToolkit()
40
 
41
  self.cache_dir = "tmp/"
 
59
  res = self.image_tool.ask_question_about_image(document_path, "Please make a detailed caption about the image.")
60
  return True, res
61
 
62
+ # if any(document_path.endswith(ext) for ext in ['.mp3', '.wav']):
63
+ # res = self.audio_tool.ask_question_about_audio(document_path, "Please transcribe the audio content to text.")
64
+ # return True, res
65
 
66
  if any(document_path.endswith(ext) for ext in ['xls', 'xlsx']):
67
  res = self.excel_tool.extract_excel_content(document_path)
owl/camel/toolkits/image_analysis_toolkit.py CHANGED
@@ -15,7 +15,7 @@ import base64
15
  import logging
16
  import json
17
  from PIL import Image
18
- from typing import List, Literal, Tuple
19
  from urllib.parse import urlparse
20
 
21
  from camel.agents import ChatAgent
@@ -23,7 +23,7 @@ from camel.configs import ChatGPTConfig
23
  from camel.toolkits.base import BaseToolkit
24
  from camel.toolkits import FunctionTool, CodeExecutionToolkit
25
  from camel.types import ModelType, ModelPlatformType
26
- from camel.models import ModelFactory, OpenAIModel
27
  from camel.messages import BaseMessage
28
 
29
  logger = logging.getLogger(__name__)
@@ -35,14 +35,8 @@ class ImageAnalysisToolkit(BaseToolkit):
35
  This class provides methods for understanding images, such as identifying
36
  objects, text in images.
37
  """
38
- def __init__(self, model: Literal['gpt-4o', 'gpt-4o-mini'] = 'gpt-4o'):
39
- self.model_type = ModelType.GPT_4O
40
- if model == 'gpt-4o':
41
- self.model_type = ModelType.GPT_4O
42
- elif model == 'gpt-4o-mini':
43
- self.model_type = ModelType.GPT_4O_MINI
44
- else:
45
- raise ValueError(f"Invalid model type: {model}")
46
 
47
  def _construct_image_url(self, image_path: str) -> str:
48
  parsed_url = urlparse(image_path)
@@ -66,78 +60,78 @@ class ImageAnalysisToolkit(BaseToolkit):
66
  return base64.b64encode(image_file.read()).decode("utf-8")
67
 
68
 
69
- def _judge_if_write_code(self, question: str, image_path: str) -> Tuple[bool, str]:
70
 
71
- _image_url = self._construct_image_url(image_path)
72
 
73
- prompt = f"""
74
- Given the question <question>{question}</question>, do you think it is suitable to write python code (using libraries like cv2) to process the image to get the answer?
75
- Your output should be in json format (```json ```) including the following fields:
76
- - `image_caption`: str, A detailed caption about the image. If it is suitable for writing code, it should contains helpful instructions and necessary informations for how to writing code.
77
- - `if_write_code`: bool, True if it is suitable to write code to process the image, False otherwise.
78
- """
79
-
80
- messages = [
81
- {
82
- "role": "system",
83
- "content": "You are a helpful assistant for image relevant tasks, and can judge whether \
84
- the given image is suitable for writing code to process or not. "
85
- },
86
- {
87
- "role": "user",
88
- "content": [
89
- {'type': 'text', 'text': prompt},
90
- {
91
- 'type': 'image_url',
92
- 'image_url': {
93
- 'url': _image_url,
94
- },
95
- },
96
- ],
97
- },
98
- ]
99
-
100
- LLM = OpenAIModel(model_type=self.model_type)
101
- resp = LLM.run(messages)
102
-
103
- result_str = resp.choices[0].message.content.lower()
104
- result_str = result_str.replace("```json", "").replace("```", "").strip()
105
-
106
- result_dict = json.loads(result_str)
107
-
108
- if_write_code = result_dict.get("if_write_code", False)
109
- image_caption = result_dict.get("image_caption", "")
110
-
111
- return if_write_code, image_caption
112
 
113
 
114
- def _get_image_caption(self, image_path: str) -> str:
115
 
116
- _image_url = self._construct_image_url(image_path)
117
 
118
- prompt = f"""
119
- Please make a detailed description about the image.
120
- """
121
-
122
- messages = [
123
- {
124
- "role": "user",
125
- "content": [
126
- {'type': 'text', 'text': prompt},
127
- {
128
- 'type': 'image_url',
129
- 'image_url': {
130
- 'url': _image_url,
131
- },
132
- },
133
- ],
134
- },
135
- ]
136
-
137
- LLM = OpenAIModel(model_type=self.model_type)
138
- resp = LLM.run(messages)
139
-
140
- return resp.choices[0].message.content
141
 
142
 
143
  def ask_question_about_image(self, image_path: str, question: str) -> str:
@@ -175,28 +169,24 @@ class ImageAnalysisToolkit(BaseToolkit):
175
  # f"data:image/jpeg;base64,{self._encode_image(image_path)}"
176
  # )
177
 
178
- model = ModelFactory.create(
179
- model_platform=ModelPlatformType.OPENAI,
180
- model_type=self.model_type,
181
- )
182
 
183
- code_model = ModelFactory.create(
184
- model_platform=ModelPlatformType.OPENAI,
185
- model_type=ModelType.O3_MINI,
186
- )
187
 
188
- code_execution_toolkit = CodeExecutionToolkit(require_confirm=False, sandbox="subprocess", verbose=True)
189
 
190
  image_agent = ChatAgent(
191
  "You are a helpful assistant for image relevant tasks. Given a question related to the image, you can carefully check the image in detail and answer the question.",
192
- model,
193
  )
194
 
195
- code_agent = ChatAgent(
196
- "You are an expert of writing code to process special images leveraging libraries like cv2.",
197
- code_model,
198
- tools=code_execution_toolkit.get_tools(),
199
- )
200
 
201
  if not is_url:
202
  image_object = Image.open(image_path)
 
15
  import logging
16
  import json
17
  from PIL import Image
18
+ from typing import List, Literal, Tuple, Optional
19
  from urllib.parse import urlparse
20
 
21
  from camel.agents import ChatAgent
 
23
  from camel.toolkits.base import BaseToolkit
24
  from camel.toolkits import FunctionTool, CodeExecutionToolkit
25
  from camel.types import ModelType, ModelPlatformType
26
+ from camel.models import ModelFactory, OpenAIModel, BaseModelBackend
27
  from camel.messages import BaseMessage
28
 
29
  logger = logging.getLogger(__name__)
 
35
  This class provides methods for understanding images, such as identifying
36
  objects, text in images.
37
  """
38
+ def __init__(self, model: Optional[BaseModelBackend] = None):
39
+ self.model = model
 
 
 
 
 
 
40
 
41
  def _construct_image_url(self, image_path: str) -> str:
42
  parsed_url = urlparse(image_path)
 
60
  return base64.b64encode(image_file.read()).decode("utf-8")
61
 
62
 
63
+ # def _judge_if_write_code(self, question: str, image_path: str) -> Tuple[bool, str]:
64
 
65
+ # _image_url = self._construct_image_url(image_path)
66
 
67
+ # prompt = f"""
68
+ # Given the question <question>{question}</question>, do you think it is suitable to write python code (using libraries like cv2) to process the image to get the answer?
69
+ # Your output should be in json format (```json ```) including the following fields:
70
+ # - `image_caption`: str, A detailed caption about the image. If it is suitable for writing code, it should contains helpful instructions and necessary informations for how to writing code.
71
+ # - `if_write_code`: bool, True if it is suitable to write code to process the image, False otherwise.
72
+ # """
73
+
74
+ # messages = [
75
+ # {
76
+ # "role": "system",
77
+ # "content": "You are a helpful assistant for image relevant tasks, and can judge whether \
78
+ # the given image is suitable for writing code to process or not. "
79
+ # },
80
+ # {
81
+ # "role": "user",
82
+ # "content": [
83
+ # {'type': 'text', 'text': prompt},
84
+ # {
85
+ # 'type': 'image_url',
86
+ # 'image_url': {
87
+ # 'url': _image_url,
88
+ # },
89
+ # },
90
+ # ],
91
+ # },
92
+ # ]
93
+
94
+ # LLM = OpenAIModel(model_type=self.model_type)
95
+ # resp = LLM.run(messages)
96
+
97
+ # result_str = resp.choices[0].message.content.lower()
98
+ # result_str = result_str.replace("```json", "").replace("```", "").strip()
99
+
100
+ # result_dict = json.loads(result_str)
101
+
102
+ # if_write_code = result_dict.get("if_write_code", False)
103
+ # image_caption = result_dict.get("image_caption", "")
104
+
105
+ # return if_write_code, image_caption
106
 
107
 
108
+ # def _get_image_caption(self, image_path: str) -> str:
109
 
110
+ # _image_url = self._construct_image_url(image_path)
111
 
112
+ # prompt = f"""
113
+ # Please make a detailed description about the image.
114
+ # """
115
+
116
+ # messages = [
117
+ # {
118
+ # "role": "user",
119
+ # "content": [
120
+ # {'type': 'text', 'text': prompt},
121
+ # {
122
+ # 'type': 'image_url',
123
+ # 'image_url': {
124
+ # 'url': _image_url,
125
+ # },
126
+ # },
127
+ # ],
128
+ # },
129
+ # ]
130
+
131
+ # LLM = OpenAIModel(model_type=self.model_type)
132
+ # resp = LLM.run(messages)
133
+
134
+ # return resp.choices[0].message.content
135
 
136
 
137
  def ask_question_about_image(self, image_path: str, question: str) -> str:
 
169
  # f"data:image/jpeg;base64,{self._encode_image(image_path)}"
170
  # )
171
 
 
 
 
 
172
 
173
+ # code_model = ModelFactory.create(
174
+ # model_platform=ModelPlatformType.OPENAI,
175
+ # model_type=ModelType.O3_MINI,
176
+ # )
177
 
178
+ # code_execution_toolkit = CodeExecutionToolkit(require_confirm=False, sandbox="subprocess", verbose=True)
179
 
180
  image_agent = ChatAgent(
181
  "You are a helpful assistant for image relevant tasks. Given a question related to the image, you can carefully check the image in detail and answer the question.",
182
+ self.model,
183
  )
184
 
185
+ # code_agent = ChatAgent(
186
+ # "You are an expert of writing code to process special images leveraging libraries like cv2.",
187
+ # code_model,
188
+ # tools=code_execution_toolkit.get_tools(),
189
+ # )
190
 
191
  if not is_url:
192
  image_object = Image.open(image_path)
owl/camel/toolkits/search_toolkit.py CHANGED
@@ -26,6 +26,7 @@ from retry import retry
26
  from camel.toolkits.base import BaseToolkit
27
  from camel.toolkits import FunctionTool
28
  from camel.messages import BaseMessage
 
29
  from camel.agents import ChatAgent
30
  from camel.models import ModelFactory
31
  from camel.types import ModelType, ModelPlatformType
@@ -37,6 +38,9 @@ class SearchToolkit(BaseToolkit):
37
  search engines like Google, DuckDuckGo, Wikipedia and Wolfram Alpha, Brave.
38
  """
39
 
 
 
 
40
  @dependencies_required("wikipedia")
41
  @retry(ConnectionError, delay=3)
42
  def search_wiki(self, entity: str) -> str:
@@ -698,15 +702,9 @@ class SearchToolkit(BaseToolkit):
698
  The search result containing url and necessary information.
699
  """
700
 
701
- model = ModelFactory.create(
702
- model_type=ModelType.GPT_4O_MINI,
703
- model_platform=ModelPlatformType.OPENAI,
704
- model_config_dict={"temperature": 0, "top_p": 1}
705
- )
706
-
707
  search_agent = ChatAgent(
708
  "You are a helpful search agent.",
709
- model=model,
710
  tools=[FunctionTool(self.search_wiki), FunctionTool(self.search_google), FunctionTool(self.search_archived_webpage)]
711
  )
712
 
 
26
  from camel.toolkits.base import BaseToolkit
27
  from camel.toolkits import FunctionTool
28
  from camel.messages import BaseMessage
29
+ from camel.models import BaseModelBackend
30
  from camel.agents import ChatAgent
31
  from camel.models import ModelFactory
32
  from camel.types import ModelType, ModelPlatformType
 
38
  search engines like Google, DuckDuckGo, Wikipedia and Wolfram Alpha, Brave.
39
  """
40
 
41
+ def __init__(self, model: Optional[BaseModelBackend] = None):
42
+ self.model = model
43
+
44
  @dependencies_required("wikipedia")
45
  @retry(ConnectionError, delay=3)
46
  def search_wiki(self, entity: str) -> str:
 
702
  The search result containing url and necessary information.
703
  """
704
 
 
 
 
 
 
 
705
  search_agent = ChatAgent(
706
  "You are a helpful search agent.",
707
+ model=self.model,
708
  tools=[FunctionTool(self.search_wiki), FunctionTool(self.search_google), FunctionTool(self.search_archived_webpage)]
709
  )
710
 
owl/camel/toolkits/web_toolkit.py CHANGED
@@ -14,7 +14,7 @@ from camel.toolkits.base import BaseToolkit
14
  from camel.toolkits import FunctionTool, VideoAnalysisToolkit
15
  from camel.messages import BaseMessage
16
  from camel.agents import ChatAgent
17
- from camel.models import ModelFactory
18
  from camel.types import ModelType, ModelPlatformType
19
 
20
  import io
@@ -717,8 +717,9 @@ class WebToolkit(BaseToolkit):
717
  headless=True,
718
  cache_dir: Optional[str] = None,
719
  page_script_path: Optional[str] = None,
720
- model: Literal['gpt-4o', 'gpt-4o-mini'] = 'gpt-4o',
721
- history_window: int = 5
 
722
  ):
723
 
724
  self.browser = BaseBrowser(
@@ -728,10 +729,12 @@ class WebToolkit(BaseToolkit):
728
  )
729
 
730
  self.history_window = history_window
 
 
731
 
732
  self.history = []
733
  # self.search_toolkit = SearchToolkit()
734
- self.web_agent, self.planning_agent = self._initialize_agent(model)
735
 
736
 
737
  def _reset(self):
@@ -741,28 +744,24 @@ class WebToolkit(BaseToolkit):
741
  os.makedirs(self.browser.cache_dir, exist_ok=True)
742
 
743
 
744
- def _initialize_agent(self, model: Literal['gpt-4o', 'gpt-4o-mini']) -> Tuple[ChatAgent, ChatAgent]:
745
  r"""Initialize the agent."""
746
- if model == 'gpt-4o':
747
  web_agent_model = ModelFactory.create(
748
  model_platform=ModelPlatformType.OPENAI,
749
  model_type=ModelType.GPT_4O,
750
  model_config_dict={"temperature": 0, "top_p": 1}
751
  )
752
- elif model == 'gpt-4o-mini':
753
- web_agent_model = ModelFactory.create(
 
 
 
754
  model_platform=ModelPlatformType.OPENAI,
755
- model_type=ModelType.GPT_4O_MINI,
756
- model_config_dict={"temperature": 0, "top_p": 1}
757
  )
758
  else:
759
- raise ValueError("Invalid model type.")
760
-
761
- planning_model = ModelFactory.create(
762
- model_platform=ModelPlatformType.OPENAI,
763
- model_type=ModelType.O3_MINI,
764
- )
765
-
766
 
767
  system_prompt = """
768
  You are a helpful web agent that can assist users in browsing the web.
 
14
  from camel.toolkits import FunctionTool, VideoAnalysisToolkit
15
  from camel.messages import BaseMessage
16
  from camel.agents import ChatAgent
17
+ from camel.models import ModelFactory, BaseModelBackend
18
  from camel.types import ModelType, ModelPlatformType
19
 
20
  import io
 
717
  headless=True,
718
  cache_dir: Optional[str] = None,
719
  page_script_path: Optional[str] = None,
720
+ history_window: int = 5,
721
+ web_agent_model: Optional[BaseModelBackend] = None,
722
+ planning_agent_model: Optional[BaseModelBackend] = None,
723
  ):
724
 
725
  self.browser = BaseBrowser(
 
729
  )
730
 
731
  self.history_window = history_window
732
+ self.web_agent_model = web_agent_model
733
+ self.planning_agent_model = planning_agent_model
734
 
735
  self.history = []
736
  # self.search_toolkit = SearchToolkit()
737
+ self.web_agent, self.planning_agent = self._initialize_agent()
738
 
739
 
740
  def _reset(self):
 
744
  os.makedirs(self.browser.cache_dir, exist_ok=True)
745
 
746
 
747
+ def _initialize_agent(self) -> Tuple[ChatAgent, ChatAgent]:
748
  r"""Initialize the agent."""
749
+ if self.web_agent_model is None:
750
  web_agent_model = ModelFactory.create(
751
  model_platform=ModelPlatformType.OPENAI,
752
  model_type=ModelType.GPT_4O,
753
  model_config_dict={"temperature": 0, "top_p": 1}
754
  )
755
+ else:
756
+ web_agent_model = self.web_agent_model
757
+
758
+ if self.planning_agent_model is None:
759
+ planning_model = ModelFactory.create(
760
  model_platform=ModelPlatformType.OPENAI,
761
+ model_type=ModelType.O3_MINI,
 
762
  )
763
  else:
764
+ planning_model = self.planning_agent_model
 
 
 
 
 
 
765
 
766
  system_prompt = """
767
  You are a helpful web agent that can assist users in browsing the web.
owl/camel/types/enums.py CHANGED
@@ -149,6 +149,7 @@ class ModelType(UnifiedModelType, Enum):
149
  QWEN_2_5_32B = "qwen2.5-32b-instruct"
150
  QWEN_2_5_14B = "qwen2.5-14b-instruct"
151
  QWEN_QWQ_32B = "qwq-32b-preview"
 
152
 
153
  # Yi models (01-ai)
154
  YI_LIGHTNING = "yi-lightning"
@@ -404,6 +405,7 @@ class ModelType(UnifiedModelType, Enum):
404
  ModelType.QWEN_2_5_32B,
405
  ModelType.QWEN_2_5_14B,
406
  ModelType.QWEN_QWQ_32B,
 
407
  }
408
 
409
  @property
@@ -502,6 +504,7 @@ class ModelType(UnifiedModelType, Enum):
502
  ModelType.INTERNLM2_PRO_CHAT,
503
  ModelType.TOGETHER_MIXTRAL_8_7B,
504
  ModelType.SGLANG_MISTRAL_7B,
 
505
  }:
506
  return 32_768
507
  elif self in {
 
149
  QWEN_2_5_32B = "qwen2.5-32b-instruct"
150
  QWEN_2_5_14B = "qwen2.5-14b-instruct"
151
  QWEN_QWQ_32B = "qwq-32b-preview"
152
+ QWEN_OMNI_TURBO = "qwen-omni-turbo"
153
 
154
  # Yi models (01-ai)
155
  YI_LIGHTNING = "yi-lightning"
 
405
  ModelType.QWEN_2_5_32B,
406
  ModelType.QWEN_2_5_14B,
407
  ModelType.QWEN_QWQ_32B,
408
+ ModelType.QWEN_OMNI_TURBO,
409
  }
410
 
411
  @property
 
504
  ModelType.INTERNLM2_PRO_CHAT,
505
  ModelType.TOGETHER_MIXTRAL_8_7B,
506
  ModelType.SGLANG_MISTRAL_7B,
507
+ ModelType.QWEN_OMNI_TURBO,
508
  }:
509
  return 32_768
510
  elif self in {
owl/run.py CHANGED
@@ -8,7 +8,7 @@ from dotenv import load_dotenv
8
  from retry import retry
9
  from loguru import logger
10
 
11
- from utils import OwlRolePlaying, process_tools, run_society
12
  import os
13
 
14
 
@@ -32,30 +32,27 @@ def construct_society(question: str) -> OwlRolePlaying:
32
  model_type=ModelType.GPT_4O,
33
  model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(), # [Optional] the config for model
34
  )
35
-
36
-
37
- user_tools = []
38
- assistant_tools = [
39
- "WebToolkit",
40
- 'DocumentProcessingToolkit',
41
- 'VideoAnalysisToolkit',
42
- 'CodeExecutionToolkit',
43
- 'ImageAnalysisToolkit',
44
- 'AudioAnalysisToolkit',
45
- "SearchToolkit",
46
- "ExcelToolkit",
47
- ]
 
 
48
 
49
  user_role_name = 'user'
50
- user_agent_kwargs = {
51
- 'model': user_model,
52
- 'tools': process_tools(user_tools),
53
- }
54
  assistant_role_name = 'assistant'
55
- assistant_agent_kwargs = {
56
- 'model': assistant_model,
57
- 'tools': process_tools(assistant_tools),
58
- }
59
 
60
  task_kwargs = {
61
  'task_prompt': question,
 
8
  from retry import retry
9
  from loguru import logger
10
 
11
+ from utils import OwlRolePlaying, run_society
12
  import os
13
 
14
 
 
32
  model_type=ModelType.GPT_4O,
33
  model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(), # [Optional] the config for model
34
  )
35
+
36
+ tools_list = [
37
+ *WebToolkit(
38
+ headless=False,
39
+ web_agent_model=assistant_model,
40
+ planning_agent_model=assistant_model
41
+ ).get_tools(),
42
+ *DocumentProcessingToolkit().get_tools(),
43
+ *VideoAnalysisToolkit().get_tools(), # This requires OpenAI and Qwen Key
44
+ *CodeExecutionToolkit().get_tools(),
45
+ *ImageAnalysisToolkit(model=assistant_model).get_tools(),
46
+ *AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
47
+ *SearchToolkit(model=assistant_model).get_tools(),
48
+ *ExcelToolkit().get_tools()
49
+ ]
50
 
51
  user_role_name = 'user'
52
+ user_agent_kwargs = dict(model=user_model)
 
 
 
53
  assistant_role_name = 'assistant'
54
+ assistant_agent_kwargs = dict(model=assistant_model,
55
+ tools=tools_list)
 
 
56
 
57
  task_kwargs = {
58
  'task_prompt': question,
owl/run_gaia_roleplaying.py CHANGED
@@ -2,7 +2,7 @@ from camel.models import ModelFactory
2
  from camel.toolkits import *
3
  from camel.types import ModelPlatformType, ModelType
4
  from camel.configs import ChatGPTConfig
5
- from utils import GAIABenchmark, process_tools
6
 
7
  from dotenv import load_dotenv
8
  from retry import retry
@@ -36,28 +36,26 @@ def main():
36
  model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(), # [Optional] the config for model
37
  )
38
 
39
- user_tools = []
40
- assistant_tools = [
41
- "WebToolkit",
42
- 'DocumentProcessingToolkit',
43
- 'VideoAnalysisToolkit',
44
- 'CodeExecutionToolkit',
45
- 'ImageAnalysisToolkit',
46
- 'AudioAnalysisToolkit',
47
- "SearchToolkit",
48
- "ExcelToolkit",
49
- ]
 
 
 
50
 
51
  user_role_name = 'user'
52
- user_agent_kwargs = {
53
- 'model': user_model,
54
- 'tools': process_tools(user_tools),
55
- }
56
  assistant_role_name = 'assistant'
57
- assistant_agent_kwargs = {
58
- 'model': assistant_model,
59
- 'tools': process_tools(assistant_tools),
60
- }
61
 
62
  benchmark = GAIABenchmark(
63
  data_dir="data/gaia",
@@ -85,4 +83,3 @@ def main():
85
 
86
  if __name__ == "__main__":
87
  main()
88
-
 
2
  from camel.toolkits import *
3
  from camel.types import ModelPlatformType, ModelType
4
  from camel.configs import ChatGPTConfig
5
+ from utils import GAIABenchmark
6
 
7
  from dotenv import load_dotenv
8
  from retry import retry
 
36
  model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(), # [Optional] the config for model
37
  )
38
 
39
+ tools_list = [
40
+ *WebToolkit(
41
+ headless=False,
42
+ web_agent_model=assistant_model,
43
+ planning_agent_model=assistant_model
44
+ ).get_tools(),
45
+ *DocumentProcessingToolkit().get_tools(),
46
+ *VideoAnalysisToolkit().get_tools(), # This requires OpenAI and Qwen Key
47
+ *CodeExecutionToolkit().get_tools(),
48
+ *ImageAnalysisToolkit(model=assistant_model).get_tools(),
49
+ *AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
50
+ *SearchToolkit(model=assistant_model).get_tools(),
51
+ *ExcelToolkit().get_tools()
52
+ ]
53
 
54
  user_role_name = 'user'
55
+ user_agent_kwargs = dict(model=user_model)
 
 
 
56
  assistant_role_name = 'assistant'
57
+ assistant_agent_kwargs = dict(model=assistant_model,
58
+ tools=tools_list)
 
 
59
 
60
  benchmark = GAIABenchmark(
61
  data_dir="data/gaia",
 
83
 
84
  if __name__ == "__main__":
85
  main()
 
owl/utils/enhanced_role_playing.py CHANGED
@@ -47,12 +47,12 @@ class OwlRolePlaying(RolePlaying):
47
  self.assistant_sys_msg: Optional[BaseMessage]
48
  self.user_sys_msg: Optional[BaseMessage]
49
 
50
- self.is_reasoning_task = self._judge_if_reasoning_task(self.task_prompt)
51
 
52
- if self.is_reasoning_task:
53
- logger.info("The task is judged as a reasoning or coding task. The assistant agent will use the reasoning model O3-MINI.")
54
- else:
55
- logger.info("The assistant agent will use the default model.")
56
 
57
  self._init_agents(
58
  init_assistant_sys_msg,
@@ -60,7 +60,7 @@ class OwlRolePlaying(RolePlaying):
60
  assistant_agent_kwargs=self.assistant_agent_kwargs,
61
  user_agent_kwargs=self.user_agent_kwargs,
62
  output_language=self.output_language,
63
- is_reasoning_task=self.is_reasoning_task
64
  )
65
 
66
 
@@ -97,12 +97,12 @@ class OwlRolePlaying(RolePlaying):
97
  elif 'model' not in user_agent_kwargs:
98
  user_agent_kwargs.update(dict(model=self.model))
99
 
100
- # If the task is a reasoning task, the assistant agent should use the reasoning model O3-MINI
101
- if is_reasoning_task:
102
- assistant_agent_kwargs['model'] = ModelFactory.create(
103
- model_platform=ModelPlatformType.OPENAI,
104
- model_type=ModelType.O3_MINI,
105
- )
106
 
107
  self.assistant_agent = ChatAgent(
108
  init_assistant_sys_msg,
@@ -119,25 +119,25 @@ class OwlRolePlaying(RolePlaying):
119
  self.user_sys_msg = self.user_agent.system_message
120
 
121
 
122
- def _judge_if_reasoning_task(self, question: str) -> bool:
123
- r"""Judge if the question is a reasoning task."""
124
 
125
- LLM = OpenAIModel(model_type=ModelType.O3_MINI)
126
- prompt = f"""
127
- Please judge whether the following question is a reasoning or coding task, which can be solved by reasoning without leveraging external resources, or is suitable for writing code to solve the task.
128
- If it is a reasoning or coding task, please return only "yes".
129
- If it is not a reasoning or coding task, please return only "no".
130
- Note:
131
- - If the question required some world knowledge to answer the question, please carefully judge it, because the model's own knowledge is often unreliable.
132
- - If it is suitable for writing codes (e.g. process excel files, write simulation codes, etc.), in most cases, it can be considered as a coding task.
133
- Question: <question>{question}</question>
134
- """
135
- messages = [{"role": "user", "content": prompt}]
136
- resp = LLM.run(messages)
137
- if 'yes' in resp.choices[0].message.content.lower():
138
- return True
139
- else:
140
- return False
141
 
142
 
143
  def _construct_gaia_sys_msgs(self):
 
47
  self.assistant_sys_msg: Optional[BaseMessage]
48
  self.user_sys_msg: Optional[BaseMessage]
49
 
50
+ # self.is_reasoning_task = self._judge_if_reasoning_task(self.task_prompt)
51
 
52
+ # if self.is_reasoning_task:
53
+ # logger.info("The task is judged as a reasoning or coding task. The assistant agent will use the reasoning model O3-MINI.")
54
+ # else:
55
+ # logger.info("The assistant agent will use the default model.")
56
 
57
  self._init_agents(
58
  init_assistant_sys_msg,
 
60
  assistant_agent_kwargs=self.assistant_agent_kwargs,
61
  user_agent_kwargs=self.user_agent_kwargs,
62
  output_language=self.output_language,
63
+ # is_reasoning_task=self.is_reasoning_task
64
  )
65
 
66
 
 
97
  elif 'model' not in user_agent_kwargs:
98
  user_agent_kwargs.update(dict(model=self.model))
99
 
100
+ # # If the task is a reasoning task, the assistant agent should use the reasoning model O3-MINI
101
+ # if is_reasoning_task:
102
+ # assistant_agent_kwargs['model'] = ModelFactory.create(
103
+ # model_platform=ModelPlatformType.OPENAI,
104
+ # model_type=ModelType.O3_MINI,
105
+ # )
106
 
107
  self.assistant_agent = ChatAgent(
108
  init_assistant_sys_msg,
 
119
  self.user_sys_msg = self.user_agent.system_message
120
 
121
 
122
+ # def _judge_if_reasoning_task(self, question: str) -> bool:
123
+ # r"""Judge if the question is a reasoning task."""
124
 
125
+ # LLM = OpenAIModel(model_type=ModelType.O3_MINI)
126
+ # prompt = f"""
127
+ # Please judge whether the following question is a reasoning or coding task, which can be solved by reasoning without leveraging external resources, or is suitable for writing code to solve the task.
128
+ # If it is a reasoning or coding task, please return only "yes".
129
+ # If it is not a reasoning or coding task, please return only "no".
130
+ # Note:
131
+ # - If the question required some world knowledge to answer the question, please carefully judge it, because the model's own knowledge is often unreliable.
132
+ # - If it is suitable for writing codes (e.g. process excel files, write simulation codes, etc.), in most cases, it can be considered as a coding task.
133
+ # Question: <question>{question}</question>
134
+ # """
135
+ # messages = [{"role": "user", "content": prompt}]
136
+ # resp = LLM.run(messages)
137
+ # if 'yes' in resp.choices[0].message.content.lower():
138
+ # return True
139
+ # else:
140
+ # return False
141
 
142
 
143
  def _construct_gaia_sys_msgs(self):