Merge pull request #45 from camel-ai/model_platform
Browse filesenhance: support model platfroms not limited to openai
- README.md +2 -0
- owl/camel/toolkits/document_processing_toolkit.py +4 -4
- owl/camel/toolkits/image_analysis_toolkit.py +81 -91
- owl/camel/toolkits/search_toolkit.py +5 -7
- owl/camel/toolkits/web_toolkit.py +16 -17
- owl/camel/types/enums.py +3 -0
- owl/run.py +19 -22
- owl/run_gaia_roleplaying.py +18 -21
- owl/utils/enhanced_role_playing.py +30 -30
README.md
CHANGED
@@ -121,6 +121,8 @@ In the `owl/.env_example` file, you will find all the necessary API keys along w
|
|
121 |
1. *Copy and Rename*: Duplicate the `.env_example` file and rename the copy to `.env`.
|
122 |
2. *Fill in Your Keys*: Open the `.env` file and insert your API keys in the corresponding fields.
|
123 |
|
|
|
|
|
124 |
# 🚀 Quick Start
|
125 |
|
126 |
Run the following minimal example:
|
|
|
121 |
1. *Copy and Rename*: Duplicate the `.env_example` file and rename the copy to `.env`.
|
122 |
2. *Fill in Your Keys*: Open the `.env` file and insert your API keys in the corresponding fields.
|
123 |
|
124 |
+
> **Note**: For optimal performance, we strongly recommend using OpenAI models. Our experiments show that other models may result in significantly lower performance on complex tasks and benchmarks.
|
125 |
+
|
126 |
# 🚀 Quick Start
|
127 |
|
128 |
Run the following minimal example:
|
owl/camel/toolkits/document_processing_toolkit.py
CHANGED
@@ -35,7 +35,7 @@ class DocumentProcessingToolkit(BaseToolkit):
|
|
35 |
"""
|
36 |
def __init__(self, cache_dir: Optional[str] = None):
|
37 |
self.image_tool = ImageAnalysisToolkit()
|
38 |
-
self.audio_tool = AudioAnalysisToolkit()
|
39 |
self.excel_tool = ExcelToolkit()
|
40 |
|
41 |
self.cache_dir = "tmp/"
|
@@ -59,9 +59,9 @@ class DocumentProcessingToolkit(BaseToolkit):
|
|
59 |
res = self.image_tool.ask_question_about_image(document_path, "Please make a detailed caption about the image.")
|
60 |
return True, res
|
61 |
|
62 |
-
if any(document_path.endswith(ext) for ext in ['.mp3', '.wav']):
|
63 |
-
|
64 |
-
|
65 |
|
66 |
if any(document_path.endswith(ext) for ext in ['xls', 'xlsx']):
|
67 |
res = self.excel_tool.extract_excel_content(document_path)
|
|
|
35 |
"""
|
36 |
def __init__(self, cache_dir: Optional[str] = None):
|
37 |
self.image_tool = ImageAnalysisToolkit()
|
38 |
+
# self.audio_tool = AudioAnalysisToolkit()
|
39 |
self.excel_tool = ExcelToolkit()
|
40 |
|
41 |
self.cache_dir = "tmp/"
|
|
|
59 |
res = self.image_tool.ask_question_about_image(document_path, "Please make a detailed caption about the image.")
|
60 |
return True, res
|
61 |
|
62 |
+
# if any(document_path.endswith(ext) for ext in ['.mp3', '.wav']):
|
63 |
+
# res = self.audio_tool.ask_question_about_audio(document_path, "Please transcribe the audio content to text.")
|
64 |
+
# return True, res
|
65 |
|
66 |
if any(document_path.endswith(ext) for ext in ['xls', 'xlsx']):
|
67 |
res = self.excel_tool.extract_excel_content(document_path)
|
owl/camel/toolkits/image_analysis_toolkit.py
CHANGED
@@ -15,7 +15,7 @@ import base64
|
|
15 |
import logging
|
16 |
import json
|
17 |
from PIL import Image
|
18 |
-
from typing import List, Literal, Tuple
|
19 |
from urllib.parse import urlparse
|
20 |
|
21 |
from camel.agents import ChatAgent
|
@@ -23,7 +23,7 @@ from camel.configs import ChatGPTConfig
|
|
23 |
from camel.toolkits.base import BaseToolkit
|
24 |
from camel.toolkits import FunctionTool, CodeExecutionToolkit
|
25 |
from camel.types import ModelType, ModelPlatformType
|
26 |
-
from camel.models import ModelFactory, OpenAIModel
|
27 |
from camel.messages import BaseMessage
|
28 |
|
29 |
logger = logging.getLogger(__name__)
|
@@ -35,14 +35,8 @@ class ImageAnalysisToolkit(BaseToolkit):
|
|
35 |
This class provides methods for understanding images, such as identifying
|
36 |
objects, text in images.
|
37 |
"""
|
38 |
-
def __init__(self, model:
|
39 |
-
self.
|
40 |
-
if model == 'gpt-4o':
|
41 |
-
self.model_type = ModelType.GPT_4O
|
42 |
-
elif model == 'gpt-4o-mini':
|
43 |
-
self.model_type = ModelType.GPT_4O_MINI
|
44 |
-
else:
|
45 |
-
raise ValueError(f"Invalid model type: {model}")
|
46 |
|
47 |
def _construct_image_url(self, image_path: str) -> str:
|
48 |
parsed_url = urlparse(image_path)
|
@@ -66,78 +60,78 @@ class ImageAnalysisToolkit(BaseToolkit):
|
|
66 |
return base64.b64encode(image_file.read()).decode("utf-8")
|
67 |
|
68 |
|
69 |
-
def _judge_if_write_code(self, question: str, image_path: str) -> Tuple[bool, str]:
|
70 |
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
|
113 |
|
114 |
-
def _get_image_caption(self, image_path: str) -> str:
|
115 |
|
116 |
-
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
|
142 |
|
143 |
def ask_question_about_image(self, image_path: str, question: str) -> str:
|
@@ -175,28 +169,24 @@ class ImageAnalysisToolkit(BaseToolkit):
|
|
175 |
# f"data:image/jpeg;base64,{self._encode_image(image_path)}"
|
176 |
# )
|
177 |
|
178 |
-
model = ModelFactory.create(
|
179 |
-
model_platform=ModelPlatformType.OPENAI,
|
180 |
-
model_type=self.model_type,
|
181 |
-
)
|
182 |
|
183 |
-
code_model = ModelFactory.create(
|
184 |
-
|
185 |
-
|
186 |
-
)
|
187 |
|
188 |
-
code_execution_toolkit = CodeExecutionToolkit(require_confirm=False, sandbox="subprocess", verbose=True)
|
189 |
|
190 |
image_agent = ChatAgent(
|
191 |
"You are a helpful assistant for image relevant tasks. Given a question related to the image, you can carefully check the image in detail and answer the question.",
|
192 |
-
model,
|
193 |
)
|
194 |
|
195 |
-
code_agent = ChatAgent(
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
)
|
200 |
|
201 |
if not is_url:
|
202 |
image_object = Image.open(image_path)
|
|
|
15 |
import logging
|
16 |
import json
|
17 |
from PIL import Image
|
18 |
+
from typing import List, Literal, Tuple, Optional
|
19 |
from urllib.parse import urlparse
|
20 |
|
21 |
from camel.agents import ChatAgent
|
|
|
23 |
from camel.toolkits.base import BaseToolkit
|
24 |
from camel.toolkits import FunctionTool, CodeExecutionToolkit
|
25 |
from camel.types import ModelType, ModelPlatformType
|
26 |
+
from camel.models import ModelFactory, OpenAIModel, BaseModelBackend
|
27 |
from camel.messages import BaseMessage
|
28 |
|
29 |
logger = logging.getLogger(__name__)
|
|
|
35 |
This class provides methods for understanding images, such as identifying
|
36 |
objects, text in images.
|
37 |
"""
|
38 |
+
def __init__(self, model: Optional[BaseModelBackend] = None):
|
39 |
+
self.model = model
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
def _construct_image_url(self, image_path: str) -> str:
|
42 |
parsed_url = urlparse(image_path)
|
|
|
60 |
return base64.b64encode(image_file.read()).decode("utf-8")
|
61 |
|
62 |
|
63 |
+
# def _judge_if_write_code(self, question: str, image_path: str) -> Tuple[bool, str]:
|
64 |
|
65 |
+
# _image_url = self._construct_image_url(image_path)
|
66 |
|
67 |
+
# prompt = f"""
|
68 |
+
# Given the question <question>{question}</question>, do you think it is suitable to write python code (using libraries like cv2) to process the image to get the answer?
|
69 |
+
# Your output should be in json format (```json ```) including the following fields:
|
70 |
+
# - `image_caption`: str, A detailed caption about the image. If it is suitable for writing code, it should contains helpful instructions and necessary informations for how to writing code.
|
71 |
+
# - `if_write_code`: bool, True if it is suitable to write code to process the image, False otherwise.
|
72 |
+
# """
|
73 |
+
|
74 |
+
# messages = [
|
75 |
+
# {
|
76 |
+
# "role": "system",
|
77 |
+
# "content": "You are a helpful assistant for image relevant tasks, and can judge whether \
|
78 |
+
# the given image is suitable for writing code to process or not. "
|
79 |
+
# },
|
80 |
+
# {
|
81 |
+
# "role": "user",
|
82 |
+
# "content": [
|
83 |
+
# {'type': 'text', 'text': prompt},
|
84 |
+
# {
|
85 |
+
# 'type': 'image_url',
|
86 |
+
# 'image_url': {
|
87 |
+
# 'url': _image_url,
|
88 |
+
# },
|
89 |
+
# },
|
90 |
+
# ],
|
91 |
+
# },
|
92 |
+
# ]
|
93 |
+
|
94 |
+
# LLM = OpenAIModel(model_type=self.model_type)
|
95 |
+
# resp = LLM.run(messages)
|
96 |
+
|
97 |
+
# result_str = resp.choices[0].message.content.lower()
|
98 |
+
# result_str = result_str.replace("```json", "").replace("```", "").strip()
|
99 |
+
|
100 |
+
# result_dict = json.loads(result_str)
|
101 |
+
|
102 |
+
# if_write_code = result_dict.get("if_write_code", False)
|
103 |
+
# image_caption = result_dict.get("image_caption", "")
|
104 |
+
|
105 |
+
# return if_write_code, image_caption
|
106 |
|
107 |
|
108 |
+
# def _get_image_caption(self, image_path: str) -> str:
|
109 |
|
110 |
+
# _image_url = self._construct_image_url(image_path)
|
111 |
|
112 |
+
# prompt = f"""
|
113 |
+
# Please make a detailed description about the image.
|
114 |
+
# """
|
115 |
+
|
116 |
+
# messages = [
|
117 |
+
# {
|
118 |
+
# "role": "user",
|
119 |
+
# "content": [
|
120 |
+
# {'type': 'text', 'text': prompt},
|
121 |
+
# {
|
122 |
+
# 'type': 'image_url',
|
123 |
+
# 'image_url': {
|
124 |
+
# 'url': _image_url,
|
125 |
+
# },
|
126 |
+
# },
|
127 |
+
# ],
|
128 |
+
# },
|
129 |
+
# ]
|
130 |
+
|
131 |
+
# LLM = OpenAIModel(model_type=self.model_type)
|
132 |
+
# resp = LLM.run(messages)
|
133 |
+
|
134 |
+
# return resp.choices[0].message.content
|
135 |
|
136 |
|
137 |
def ask_question_about_image(self, image_path: str, question: str) -> str:
|
|
|
169 |
# f"data:image/jpeg;base64,{self._encode_image(image_path)}"
|
170 |
# )
|
171 |
|
|
|
|
|
|
|
|
|
172 |
|
173 |
+
# code_model = ModelFactory.create(
|
174 |
+
# model_platform=ModelPlatformType.OPENAI,
|
175 |
+
# model_type=ModelType.O3_MINI,
|
176 |
+
# )
|
177 |
|
178 |
+
# code_execution_toolkit = CodeExecutionToolkit(require_confirm=False, sandbox="subprocess", verbose=True)
|
179 |
|
180 |
image_agent = ChatAgent(
|
181 |
"You are a helpful assistant for image relevant tasks. Given a question related to the image, you can carefully check the image in detail and answer the question.",
|
182 |
+
self.model,
|
183 |
)
|
184 |
|
185 |
+
# code_agent = ChatAgent(
|
186 |
+
# "You are an expert of writing code to process special images leveraging libraries like cv2.",
|
187 |
+
# code_model,
|
188 |
+
# tools=code_execution_toolkit.get_tools(),
|
189 |
+
# )
|
190 |
|
191 |
if not is_url:
|
192 |
image_object = Image.open(image_path)
|
owl/camel/toolkits/search_toolkit.py
CHANGED
@@ -26,6 +26,7 @@ from retry import retry
|
|
26 |
from camel.toolkits.base import BaseToolkit
|
27 |
from camel.toolkits import FunctionTool
|
28 |
from camel.messages import BaseMessage
|
|
|
29 |
from camel.agents import ChatAgent
|
30 |
from camel.models import ModelFactory
|
31 |
from camel.types import ModelType, ModelPlatformType
|
@@ -37,6 +38,9 @@ class SearchToolkit(BaseToolkit):
|
|
37 |
search engines like Google, DuckDuckGo, Wikipedia and Wolfram Alpha, Brave.
|
38 |
"""
|
39 |
|
|
|
|
|
|
|
40 |
@dependencies_required("wikipedia")
|
41 |
@retry(ConnectionError, delay=3)
|
42 |
def search_wiki(self, entity: str) -> str:
|
@@ -698,15 +702,9 @@ class SearchToolkit(BaseToolkit):
|
|
698 |
The search result containing url and necessary information.
|
699 |
"""
|
700 |
|
701 |
-
model = ModelFactory.create(
|
702 |
-
model_type=ModelType.GPT_4O_MINI,
|
703 |
-
model_platform=ModelPlatformType.OPENAI,
|
704 |
-
model_config_dict={"temperature": 0, "top_p": 1}
|
705 |
-
)
|
706 |
-
|
707 |
search_agent = ChatAgent(
|
708 |
"You are a helpful search agent.",
|
709 |
-
model=model,
|
710 |
tools=[FunctionTool(self.search_wiki), FunctionTool(self.search_google), FunctionTool(self.search_archived_webpage)]
|
711 |
)
|
712 |
|
|
|
26 |
from camel.toolkits.base import BaseToolkit
|
27 |
from camel.toolkits import FunctionTool
|
28 |
from camel.messages import BaseMessage
|
29 |
+
from camel.models import BaseModelBackend
|
30 |
from camel.agents import ChatAgent
|
31 |
from camel.models import ModelFactory
|
32 |
from camel.types import ModelType, ModelPlatformType
|
|
|
38 |
search engines like Google, DuckDuckGo, Wikipedia and Wolfram Alpha, Brave.
|
39 |
"""
|
40 |
|
41 |
+
def __init__(self, model: Optional[BaseModelBackend] = None):
|
42 |
+
self.model = model
|
43 |
+
|
44 |
@dependencies_required("wikipedia")
|
45 |
@retry(ConnectionError, delay=3)
|
46 |
def search_wiki(self, entity: str) -> str:
|
|
|
702 |
The search result containing url and necessary information.
|
703 |
"""
|
704 |
|
|
|
|
|
|
|
|
|
|
|
|
|
705 |
search_agent = ChatAgent(
|
706 |
"You are a helpful search agent.",
|
707 |
+
model=self.model,
|
708 |
tools=[FunctionTool(self.search_wiki), FunctionTool(self.search_google), FunctionTool(self.search_archived_webpage)]
|
709 |
)
|
710 |
|
owl/camel/toolkits/web_toolkit.py
CHANGED
@@ -14,7 +14,7 @@ from camel.toolkits.base import BaseToolkit
|
|
14 |
from camel.toolkits import FunctionTool, VideoAnalysisToolkit
|
15 |
from camel.messages import BaseMessage
|
16 |
from camel.agents import ChatAgent
|
17 |
-
from camel.models import ModelFactory
|
18 |
from camel.types import ModelType, ModelPlatformType
|
19 |
|
20 |
import io
|
@@ -717,8 +717,9 @@ class WebToolkit(BaseToolkit):
|
|
717 |
headless=True,
|
718 |
cache_dir: Optional[str] = None,
|
719 |
page_script_path: Optional[str] = None,
|
720 |
-
|
721 |
-
|
|
|
722 |
):
|
723 |
|
724 |
self.browser = BaseBrowser(
|
@@ -728,10 +729,12 @@ class WebToolkit(BaseToolkit):
|
|
728 |
)
|
729 |
|
730 |
self.history_window = history_window
|
|
|
|
|
731 |
|
732 |
self.history = []
|
733 |
# self.search_toolkit = SearchToolkit()
|
734 |
-
self.web_agent, self.planning_agent = self._initialize_agent(
|
735 |
|
736 |
|
737 |
def _reset(self):
|
@@ -741,28 +744,24 @@ class WebToolkit(BaseToolkit):
|
|
741 |
os.makedirs(self.browser.cache_dir, exist_ok=True)
|
742 |
|
743 |
|
744 |
-
def _initialize_agent(self
|
745 |
r"""Initialize the agent."""
|
746 |
-
if
|
747 |
web_agent_model = ModelFactory.create(
|
748 |
model_platform=ModelPlatformType.OPENAI,
|
749 |
model_type=ModelType.GPT_4O,
|
750 |
model_config_dict={"temperature": 0, "top_p": 1}
|
751 |
)
|
752 |
-
|
753 |
-
web_agent_model =
|
|
|
|
|
|
|
754 |
model_platform=ModelPlatformType.OPENAI,
|
755 |
-
model_type=ModelType.
|
756 |
-
model_config_dict={"temperature": 0, "top_p": 1}
|
757 |
)
|
758 |
else:
|
759 |
-
|
760 |
-
|
761 |
-
planning_model = ModelFactory.create(
|
762 |
-
model_platform=ModelPlatformType.OPENAI,
|
763 |
-
model_type=ModelType.O3_MINI,
|
764 |
-
)
|
765 |
-
|
766 |
|
767 |
system_prompt = """
|
768 |
You are a helpful web agent that can assist users in browsing the web.
|
|
|
14 |
from camel.toolkits import FunctionTool, VideoAnalysisToolkit
|
15 |
from camel.messages import BaseMessage
|
16 |
from camel.agents import ChatAgent
|
17 |
+
from camel.models import ModelFactory, BaseModelBackend
|
18 |
from camel.types import ModelType, ModelPlatformType
|
19 |
|
20 |
import io
|
|
|
717 |
headless=True,
|
718 |
cache_dir: Optional[str] = None,
|
719 |
page_script_path: Optional[str] = None,
|
720 |
+
history_window: int = 5,
|
721 |
+
web_agent_model: Optional[BaseModelBackend] = None,
|
722 |
+
planning_agent_model: Optional[BaseModelBackend] = None,
|
723 |
):
|
724 |
|
725 |
self.browser = BaseBrowser(
|
|
|
729 |
)
|
730 |
|
731 |
self.history_window = history_window
|
732 |
+
self.web_agent_model = web_agent_model
|
733 |
+
self.planning_agent_model = planning_agent_model
|
734 |
|
735 |
self.history = []
|
736 |
# self.search_toolkit = SearchToolkit()
|
737 |
+
self.web_agent, self.planning_agent = self._initialize_agent()
|
738 |
|
739 |
|
740 |
def _reset(self):
|
|
|
744 |
os.makedirs(self.browser.cache_dir, exist_ok=True)
|
745 |
|
746 |
|
747 |
+
def _initialize_agent(self) -> Tuple[ChatAgent, ChatAgent]:
|
748 |
r"""Initialize the agent."""
|
749 |
+
if self.web_agent_model is None:
|
750 |
web_agent_model = ModelFactory.create(
|
751 |
model_platform=ModelPlatformType.OPENAI,
|
752 |
model_type=ModelType.GPT_4O,
|
753 |
model_config_dict={"temperature": 0, "top_p": 1}
|
754 |
)
|
755 |
+
else:
|
756 |
+
web_agent_model = self.web_agent_model
|
757 |
+
|
758 |
+
if self.planning_agent_model is None:
|
759 |
+
planning_model = ModelFactory.create(
|
760 |
model_platform=ModelPlatformType.OPENAI,
|
761 |
+
model_type=ModelType.O3_MINI,
|
|
|
762 |
)
|
763 |
else:
|
764 |
+
planning_model = self.planning_agent_model
|
|
|
|
|
|
|
|
|
|
|
|
|
765 |
|
766 |
system_prompt = """
|
767 |
You are a helpful web agent that can assist users in browsing the web.
|
owl/camel/types/enums.py
CHANGED
@@ -149,6 +149,7 @@ class ModelType(UnifiedModelType, Enum):
|
|
149 |
QWEN_2_5_32B = "qwen2.5-32b-instruct"
|
150 |
QWEN_2_5_14B = "qwen2.5-14b-instruct"
|
151 |
QWEN_QWQ_32B = "qwq-32b-preview"
|
|
|
152 |
|
153 |
# Yi models (01-ai)
|
154 |
YI_LIGHTNING = "yi-lightning"
|
@@ -404,6 +405,7 @@ class ModelType(UnifiedModelType, Enum):
|
|
404 |
ModelType.QWEN_2_5_32B,
|
405 |
ModelType.QWEN_2_5_14B,
|
406 |
ModelType.QWEN_QWQ_32B,
|
|
|
407 |
}
|
408 |
|
409 |
@property
|
@@ -502,6 +504,7 @@ class ModelType(UnifiedModelType, Enum):
|
|
502 |
ModelType.INTERNLM2_PRO_CHAT,
|
503 |
ModelType.TOGETHER_MIXTRAL_8_7B,
|
504 |
ModelType.SGLANG_MISTRAL_7B,
|
|
|
505 |
}:
|
506 |
return 32_768
|
507 |
elif self in {
|
|
|
149 |
QWEN_2_5_32B = "qwen2.5-32b-instruct"
|
150 |
QWEN_2_5_14B = "qwen2.5-14b-instruct"
|
151 |
QWEN_QWQ_32B = "qwq-32b-preview"
|
152 |
+
QWEN_OMNI_TURBO = "qwen-omni-turbo"
|
153 |
|
154 |
# Yi models (01-ai)
|
155 |
YI_LIGHTNING = "yi-lightning"
|
|
|
405 |
ModelType.QWEN_2_5_32B,
|
406 |
ModelType.QWEN_2_5_14B,
|
407 |
ModelType.QWEN_QWQ_32B,
|
408 |
+
ModelType.QWEN_OMNI_TURBO,
|
409 |
}
|
410 |
|
411 |
@property
|
|
|
504 |
ModelType.INTERNLM2_PRO_CHAT,
|
505 |
ModelType.TOGETHER_MIXTRAL_8_7B,
|
506 |
ModelType.SGLANG_MISTRAL_7B,
|
507 |
+
ModelType.QWEN_OMNI_TURBO,
|
508 |
}:
|
509 |
return 32_768
|
510 |
elif self in {
|
owl/run.py
CHANGED
@@ -8,7 +8,7 @@ from dotenv import load_dotenv
|
|
8 |
from retry import retry
|
9 |
from loguru import logger
|
10 |
|
11 |
-
from utils import OwlRolePlaying,
|
12 |
import os
|
13 |
|
14 |
|
@@ -32,30 +32,27 @@ def construct_society(question: str) -> OwlRolePlaying:
|
|
32 |
model_type=ModelType.GPT_4O,
|
33 |
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(), # [Optional] the config for model
|
34 |
)
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
48 |
|
49 |
user_role_name = 'user'
|
50 |
-
user_agent_kwargs =
|
51 |
-
'model': user_model,
|
52 |
-
'tools': process_tools(user_tools),
|
53 |
-
}
|
54 |
assistant_role_name = 'assistant'
|
55 |
-
assistant_agent_kwargs =
|
56 |
-
|
57 |
-
'tools': process_tools(assistant_tools),
|
58 |
-
}
|
59 |
|
60 |
task_kwargs = {
|
61 |
'task_prompt': question,
|
|
|
8 |
from retry import retry
|
9 |
from loguru import logger
|
10 |
|
11 |
+
from utils import OwlRolePlaying, run_society
|
12 |
import os
|
13 |
|
14 |
|
|
|
32 |
model_type=ModelType.GPT_4O,
|
33 |
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(), # [Optional] the config for model
|
34 |
)
|
35 |
+
|
36 |
+
tools_list = [
|
37 |
+
*WebToolkit(
|
38 |
+
headless=False,
|
39 |
+
web_agent_model=assistant_model,
|
40 |
+
planning_agent_model=assistant_model
|
41 |
+
).get_tools(),
|
42 |
+
*DocumentProcessingToolkit().get_tools(),
|
43 |
+
*VideoAnalysisToolkit().get_tools(), # This requires OpenAI and Qwen Key
|
44 |
+
*CodeExecutionToolkit().get_tools(),
|
45 |
+
*ImageAnalysisToolkit(model=assistant_model).get_tools(),
|
46 |
+
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
|
47 |
+
*SearchToolkit(model=assistant_model).get_tools(),
|
48 |
+
*ExcelToolkit().get_tools()
|
49 |
+
]
|
50 |
|
51 |
user_role_name = 'user'
|
52 |
+
user_agent_kwargs = dict(model=user_model)
|
|
|
|
|
|
|
53 |
assistant_role_name = 'assistant'
|
54 |
+
assistant_agent_kwargs = dict(model=assistant_model,
|
55 |
+
tools=tools_list)
|
|
|
|
|
56 |
|
57 |
task_kwargs = {
|
58 |
'task_prompt': question,
|
owl/run_gaia_roleplaying.py
CHANGED
@@ -2,7 +2,7 @@ from camel.models import ModelFactory
|
|
2 |
from camel.toolkits import *
|
3 |
from camel.types import ModelPlatformType, ModelType
|
4 |
from camel.configs import ChatGPTConfig
|
5 |
-
from utils import GAIABenchmark
|
6 |
|
7 |
from dotenv import load_dotenv
|
8 |
from retry import retry
|
@@ -36,28 +36,26 @@ def main():
|
|
36 |
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(), # [Optional] the config for model
|
37 |
)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
50 |
|
51 |
user_role_name = 'user'
|
52 |
-
user_agent_kwargs =
|
53 |
-
'model': user_model,
|
54 |
-
'tools': process_tools(user_tools),
|
55 |
-
}
|
56 |
assistant_role_name = 'assistant'
|
57 |
-
assistant_agent_kwargs =
|
58 |
-
|
59 |
-
'tools': process_tools(assistant_tools),
|
60 |
-
}
|
61 |
|
62 |
benchmark = GAIABenchmark(
|
63 |
data_dir="data/gaia",
|
@@ -85,4 +83,3 @@ def main():
|
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
main()
|
88 |
-
|
|
|
2 |
from camel.toolkits import *
|
3 |
from camel.types import ModelPlatformType, ModelType
|
4 |
from camel.configs import ChatGPTConfig
|
5 |
+
from utils import GAIABenchmark
|
6 |
|
7 |
from dotenv import load_dotenv
|
8 |
from retry import retry
|
|
|
36 |
model_config_dict=ChatGPTConfig(temperature=0, top_p=1).as_dict(), # [Optional] the config for model
|
37 |
)
|
38 |
|
39 |
+
tools_list = [
|
40 |
+
*WebToolkit(
|
41 |
+
headless=False,
|
42 |
+
web_agent_model=assistant_model,
|
43 |
+
planning_agent_model=assistant_model
|
44 |
+
).get_tools(),
|
45 |
+
*DocumentProcessingToolkit().get_tools(),
|
46 |
+
*VideoAnalysisToolkit().get_tools(), # This requires OpenAI and Qwen Key
|
47 |
+
*CodeExecutionToolkit().get_tools(),
|
48 |
+
*ImageAnalysisToolkit(model=assistant_model).get_tools(),
|
49 |
+
*AudioAnalysisToolkit().get_tools(), # This requires OpenAI Key
|
50 |
+
*SearchToolkit(model=assistant_model).get_tools(),
|
51 |
+
*ExcelToolkit().get_tools()
|
52 |
+
]
|
53 |
|
54 |
user_role_name = 'user'
|
55 |
+
user_agent_kwargs = dict(model=user_model)
|
|
|
|
|
|
|
56 |
assistant_role_name = 'assistant'
|
57 |
+
assistant_agent_kwargs = dict(model=assistant_model,
|
58 |
+
tools=tools_list)
|
|
|
|
|
59 |
|
60 |
benchmark = GAIABenchmark(
|
61 |
data_dir="data/gaia",
|
|
|
83 |
|
84 |
if __name__ == "__main__":
|
85 |
main()
|
|
owl/utils/enhanced_role_playing.py
CHANGED
@@ -47,12 +47,12 @@ class OwlRolePlaying(RolePlaying):
|
|
47 |
self.assistant_sys_msg: Optional[BaseMessage]
|
48 |
self.user_sys_msg: Optional[BaseMessage]
|
49 |
|
50 |
-
self.is_reasoning_task = self._judge_if_reasoning_task(self.task_prompt)
|
51 |
|
52 |
-
if self.is_reasoning_task:
|
53 |
-
|
54 |
-
else:
|
55 |
-
|
56 |
|
57 |
self._init_agents(
|
58 |
init_assistant_sys_msg,
|
@@ -60,7 +60,7 @@ class OwlRolePlaying(RolePlaying):
|
|
60 |
assistant_agent_kwargs=self.assistant_agent_kwargs,
|
61 |
user_agent_kwargs=self.user_agent_kwargs,
|
62 |
output_language=self.output_language,
|
63 |
-
is_reasoning_task=self.is_reasoning_task
|
64 |
)
|
65 |
|
66 |
|
@@ -97,12 +97,12 @@ class OwlRolePlaying(RolePlaying):
|
|
97 |
elif 'model' not in user_agent_kwargs:
|
98 |
user_agent_kwargs.update(dict(model=self.model))
|
99 |
|
100 |
-
# If the task is a reasoning task, the assistant agent should use the reasoning model O3-MINI
|
101 |
-
if is_reasoning_task:
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
|
107 |
self.assistant_agent = ChatAgent(
|
108 |
init_assistant_sys_msg,
|
@@ -119,25 +119,25 @@ class OwlRolePlaying(RolePlaying):
|
|
119 |
self.user_sys_msg = self.user_agent.system_message
|
120 |
|
121 |
|
122 |
-
def _judge_if_reasoning_task(self, question: str) -> bool:
|
123 |
-
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
|
142 |
|
143 |
def _construct_gaia_sys_msgs(self):
|
|
|
47 |
self.assistant_sys_msg: Optional[BaseMessage]
|
48 |
self.user_sys_msg: Optional[BaseMessage]
|
49 |
|
50 |
+
# self.is_reasoning_task = self._judge_if_reasoning_task(self.task_prompt)
|
51 |
|
52 |
+
# if self.is_reasoning_task:
|
53 |
+
# logger.info("The task is judged as a reasoning or coding task. The assistant agent will use the reasoning model O3-MINI.")
|
54 |
+
# else:
|
55 |
+
# logger.info("The assistant agent will use the default model.")
|
56 |
|
57 |
self._init_agents(
|
58 |
init_assistant_sys_msg,
|
|
|
60 |
assistant_agent_kwargs=self.assistant_agent_kwargs,
|
61 |
user_agent_kwargs=self.user_agent_kwargs,
|
62 |
output_language=self.output_language,
|
63 |
+
# is_reasoning_task=self.is_reasoning_task
|
64 |
)
|
65 |
|
66 |
|
|
|
97 |
elif 'model' not in user_agent_kwargs:
|
98 |
user_agent_kwargs.update(dict(model=self.model))
|
99 |
|
100 |
+
# # If the task is a reasoning task, the assistant agent should use the reasoning model O3-MINI
|
101 |
+
# if is_reasoning_task:
|
102 |
+
# assistant_agent_kwargs['model'] = ModelFactory.create(
|
103 |
+
# model_platform=ModelPlatformType.OPENAI,
|
104 |
+
# model_type=ModelType.O3_MINI,
|
105 |
+
# )
|
106 |
|
107 |
self.assistant_agent = ChatAgent(
|
108 |
init_assistant_sys_msg,
|
|
|
119 |
self.user_sys_msg = self.user_agent.system_message
|
120 |
|
121 |
|
122 |
+
# def _judge_if_reasoning_task(self, question: str) -> bool:
|
123 |
+
# r"""Judge if the question is a reasoning task."""
|
124 |
|
125 |
+
# LLM = OpenAIModel(model_type=ModelType.O3_MINI)
|
126 |
+
# prompt = f"""
|
127 |
+
# Please judge whether the following question is a reasoning or coding task, which can be solved by reasoning without leveraging external resources, or is suitable for writing code to solve the task.
|
128 |
+
# If it is a reasoning or coding task, please return only "yes".
|
129 |
+
# If it is not a reasoning or coding task, please return only "no".
|
130 |
+
# Note:
|
131 |
+
# - If the question required some world knowledge to answer the question, please carefully judge it, because the model's own knowledge is often unreliable.
|
132 |
+
# - If it is suitable for writing codes (e.g. process excel files, write simulation codes, etc.), in most cases, it can be considered as a coding task.
|
133 |
+
# Question: <question>{question}</question>
|
134 |
+
# """
|
135 |
+
# messages = [{"role": "user", "content": prompt}]
|
136 |
+
# resp = LLM.run(messages)
|
137 |
+
# if 'yes' in resp.choices[0].message.content.lower():
|
138 |
+
# return True
|
139 |
+
# else:
|
140 |
+
# return False
|
141 |
|
142 |
|
143 |
def _construct_gaia_sys_msgs(self):
|