Update web_toolkit.py and add run_qwen_mini_zh.py (#105)
Browse files- owl/camel/toolkits/web_toolkit.py +105 -48
- owl/run_qwen_mini_zh.py +96 -0
owl/camel/toolkits/web_toolkit.py
CHANGED
@@ -5,7 +5,6 @@ from playwright._impl._errors import TimeoutError
|
|
5 |
from loguru import logger
|
6 |
from typing import Any, Dict, List, TypedDict, Union, BinaryIO
|
7 |
from PIL import Image, ImageDraw, ImageFont
|
8 |
-
from firecrawl import FirecrawlApp
|
9 |
from html2text import html2text
|
10 |
from retry import retry
|
11 |
from copy import deepcopy
|
@@ -16,6 +15,7 @@ from camel.messages import BaseMessage
|
|
16 |
from camel.agents import ChatAgent
|
17 |
from camel.models import ModelFactory, BaseModelBackend
|
18 |
from camel.types import ModelType, ModelPlatformType
|
|
|
19 |
|
20 |
import io
|
21 |
import random
|
@@ -49,7 +49,8 @@ AVAILABLE_ACTIONS_PROMPT = """
|
|
49 |
|
50 |
ACTION_WITH_FEEDBACK_LIST = [
|
51 |
'ask_question_about_video',
|
52 |
-
'download_file_id'
|
|
|
53 |
]
|
54 |
|
55 |
|
@@ -106,26 +107,61 @@ def _get_bool(d: Any, k: str) -> bool:
|
|
106 |
|
107 |
|
108 |
def _parse_json_output(text: str) -> Dict[str, Any]:
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
try:
|
118 |
return json.loads(text)
|
119 |
except json.JSONDecodeError:
|
120 |
-
logger.warning(f"Failed to parse JSON output: {text}")
|
121 |
-
# Try to clean the text further and attempt parsing again
|
122 |
try:
|
123 |
-
|
124 |
-
|
125 |
-
return json.loads(cleaned_text)
|
126 |
except json.JSONDecodeError:
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
|
131 |
def _reload_image(image: Image.Image):
|
@@ -187,7 +223,6 @@ def add_set_of_mark(
|
|
187 |
if isinstance(screenshot, bytes):
|
188 |
screenshot = io.BytesIO(screenshot)
|
189 |
|
190 |
-
# TODO: Not sure why this cast was needed, but by this point screenshot is a binary file-like object
|
191 |
image = Image.open(cast(BinaryIO, screenshot))
|
192 |
comp, visible_rects, rects_above, rects_below = _add_set_of_mark(image, ROIs)
|
193 |
image.close()
|
@@ -266,8 +301,7 @@ def _color(identifier: int) -> Tuple[int, int, int, int]:
|
|
266 |
class BaseBrowser:
|
267 |
def __init__(self,
|
268 |
headless=True,
|
269 |
-
cache_dir: Optional[str] = None
|
270 |
-
page_script_path: Optional[str] = None):
|
271 |
r"""Initialize the WebBrowserToolkit instance.
|
272 |
|
273 |
Args:
|
@@ -292,25 +326,23 @@ class BaseBrowser:
|
|
292 |
|
293 |
self.page_history = [] # stores the history of visited pages
|
294 |
|
295 |
-
|
296 |
# set the cache directory
|
297 |
self.cache_dir = "tmp/"
|
298 |
os.makedirs(self.cache_dir, exist_ok=True)
|
299 |
if cache_dir is not None:
|
300 |
self.cache_dir = cache_dir
|
301 |
-
|
302 |
# load the page script
|
303 |
-
|
304 |
-
|
305 |
-
page_script_path = os.path.join(abs_dir_path, "page_script.js")
|
306 |
|
307 |
try:
|
308 |
with open(page_script_path, "r", encoding='utf-8') as f:
|
309 |
self.page_script = f.read()
|
310 |
f.close()
|
311 |
except FileNotFoundError:
|
312 |
-
|
313 |
-
|
314 |
|
315 |
def init(self):
|
316 |
r"""Initialize the browser."""
|
@@ -333,6 +365,7 @@ class BaseBrowser:
|
|
333 |
# self.page.wait_for_load_state("networkidle", timeout=timeout_ms)
|
334 |
# self.page.wait_for_load_state("domcontentloaded", timeout=timeout_ms)
|
335 |
time.sleep(2)
|
|
|
336 |
|
337 |
def click_blank_area(self):
|
338 |
r"""Click a blank area of the page to unfocus the current element."""
|
@@ -379,7 +412,9 @@ class BaseBrowser:
|
|
379 |
file_path = None
|
380 |
if save_image:
|
381 |
# get url name to form a file name
|
382 |
-
url_name = self.page_url.split("/")[-1]
|
|
|
|
|
383 |
|
384 |
# get formatted time: mmddhhmmss
|
385 |
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
|
@@ -486,7 +521,9 @@ class BaseBrowser:
|
|
486 |
file_path = None
|
487 |
comp, visible_rects, rects_above, rects_below = add_set_of_mark(screenshot, rects)
|
488 |
if save_image:
|
489 |
-
url_name = self.page_url.split("/")[-1]
|
|
|
|
|
490 |
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
|
491 |
file_path = os.path.join(self.cache_dir, f"{url_name}_{timestamp}.png")
|
492 |
with open(file_path, "wb") as f:
|
@@ -513,13 +550,11 @@ class BaseBrowser:
|
|
513 |
identifier = str(identifier)
|
514 |
target = self.page.locator(f"[__elementId='{identifier}']")
|
515 |
|
516 |
-
# See if it exists
|
517 |
try:
|
518 |
target.wait_for(timeout=5000)
|
519 |
except TimeoutError:
|
520 |
raise ValueError("No such element.") from None
|
521 |
|
522 |
-
# Click it
|
523 |
target.scroll_into_view_if_needed()
|
524 |
|
525 |
new_page = None
|
@@ -533,15 +568,15 @@ class BaseBrowser:
|
|
533 |
if new_page:
|
534 |
self.page_history.append(deepcopy(self.page.url))
|
535 |
self.page = new_page
|
|
|
536 |
except PlaywrightError:
|
537 |
pass
|
538 |
-
|
539 |
self._wait_for_load()
|
540 |
|
541 |
|
542 |
def extract_url_content(self):
|
543 |
r"""Extract the content of the current page."""
|
544 |
-
# TODO: update it using firecrawl
|
545 |
content = self.page.content()
|
546 |
return content
|
547 |
|
@@ -719,7 +754,6 @@ class BaseBrowser:
|
|
719 |
}
|
720 |
""")
|
721 |
|
722 |
-
|
723 |
@retry(requests.RequestException)
|
724 |
def get_webpage_content(self) -> str:
|
725 |
self._wait_for_load()
|
@@ -729,29 +763,40 @@ class BaseBrowser:
|
|
729 |
return markdown_content
|
730 |
|
731 |
|
732 |
-
|
733 |
class WebToolkit(BaseToolkit):
|
|
|
|
|
|
|
|
|
734 |
def __init__(self,
|
735 |
-
headless=True,
|
736 |
cache_dir: Optional[str] = None,
|
737 |
-
page_script_path: Optional[str] = None,
|
738 |
history_window: int = 5,
|
739 |
web_agent_model: Optional[BaseModelBackend] = None,
|
740 |
planning_agent_model: Optional[BaseModelBackend] = None,
|
|
|
741 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
742 |
|
743 |
self.browser = BaseBrowser(
|
744 |
headless=headless,
|
745 |
-
cache_dir=cache_dir
|
746 |
-
page_script_path=page_script_path
|
747 |
)
|
748 |
|
749 |
self.history_window = history_window
|
750 |
self.web_agent_model = web_agent_model
|
751 |
self.planning_agent_model = planning_agent_model
|
|
|
752 |
|
753 |
self.history = []
|
754 |
-
# self.search_toolkit = SearchToolkit()
|
755 |
self.web_agent, self.planning_agent = self._initialize_agent()
|
756 |
|
757 |
|
@@ -789,6 +834,7 @@ Given a high-level task, you can leverage predefined browser tools to help users
|
|
789 |
web_agent = ChatAgent(
|
790 |
system_message=system_prompt,
|
791 |
model=web_agent_model,
|
|
|
792 |
)
|
793 |
|
794 |
planning_system_prompt = """
|
@@ -797,7 +843,8 @@ You are a helpful planning agent that can assist users in planning complex tasks
|
|
797 |
|
798 |
planning_agent = ChatAgent(
|
799 |
system_message=planning_system_prompt,
|
800 |
-
model=planning_model
|
|
|
801 |
)
|
802 |
|
803 |
return web_agent, planning_agent
|
@@ -873,6 +920,19 @@ Here are some tips for you:
|
|
873 |
observation_result: str = resp_dict.get("observation", "")
|
874 |
reasoning_result: str = resp_dict.get("reasoning", "")
|
875 |
action_code: str = resp_dict.get("action_code", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
876 |
action_code = action_code.replace("`", "").strip()
|
877 |
|
878 |
return observation_result, reasoning_result, action_code
|
@@ -919,8 +979,6 @@ Here are some tips for you:
|
|
919 |
r"""Get the final answer based on the task prompt and current browser state.
|
920 |
It is used when the agent thinks that the task can be completed without any further action, and answer can be directly found in the current viewport.
|
921 |
"""
|
922 |
-
# screenshot, _ = self.browser.get_screenshot()
|
923 |
-
# img = _reload_image(screenshot)
|
924 |
|
925 |
prompt = f"""
|
926 |
We are solving a complex web task which needs multi-step browser interaction. After the multi-step observation, reasoning and acting with web browser, we think that the task is currently solved.
|
@@ -1036,37 +1094,36 @@ Your output should be in json format, including the following fields:
|
|
1036 |
return False, replanned_schema
|
1037 |
|
1038 |
|
|
|
1039 |
def browser_simulation(self,
|
1040 |
task_prompt: str,
|
1041 |
start_url: str,
|
|
|
1042 |
) -> str:
|
1043 |
r"""A powerful toolkit which can simulate the browser interaction to solve the task which needs multi-step actions.
|
1044 |
|
1045 |
Args:
|
1046 |
task_prompt (str): The task prompt to solve.
|
1047 |
start_url (str): The start URL to visit.
|
|
|
1048 |
|
1049 |
Returns:
|
1050 |
str: The simulation result to the task.
|
1051 |
"""
|
1052 |
-
|
1053 |
-
ROUND_LIMIT = 12
|
1054 |
-
|
1055 |
self._reset()
|
1056 |
task_completed = False
|
1057 |
-
|
1058 |
detailed_plan = self._task_planning(task_prompt, start_url)
|
1059 |
logger.debug(f"Detailed plan: {detailed_plan}")
|
1060 |
|
1061 |
self.browser.init()
|
1062 |
self.browser.visit_page(start_url)
|
1063 |
|
1064 |
-
for i in range(
|
1065 |
observation, reasoning, action_code = self._observe(task_prompt, detailed_plan)
|
1066 |
logger.debug(f"Observation: {observation}")
|
1067 |
logger.debug(f"Reasoning: {reasoning}")
|
1068 |
logger.debug(f"Action code: {action_code}")
|
1069 |
-
# breakpoint()
|
1070 |
|
1071 |
if "stop" in action_code:
|
1072 |
task_completed = True
|
|
|
5 |
from loguru import logger
|
6 |
from typing import Any, Dict, List, TypedDict, Union, BinaryIO
|
7 |
from PIL import Image, ImageDraw, ImageFont
|
|
|
8 |
from html2text import html2text
|
9 |
from retry import retry
|
10 |
from copy import deepcopy
|
|
|
15 |
from camel.agents import ChatAgent
|
16 |
from camel.models import ModelFactory, BaseModelBackend
|
17 |
from camel.types import ModelType, ModelPlatformType
|
18 |
+
from camel.utils import dependencies_required
|
19 |
|
20 |
import io
|
21 |
import random
|
|
|
49 |
|
50 |
ACTION_WITH_FEEDBACK_LIST = [
|
51 |
'ask_question_about_video',
|
52 |
+
'download_file_id',
|
53 |
+
'find_text_on_page',
|
54 |
]
|
55 |
|
56 |
|
|
|
107 |
|
108 |
|
109 |
def _parse_json_output(text: str) -> Dict[str, Any]:
|
110 |
+
"""Extract JSON output from a string."""
|
111 |
+
|
112 |
+
markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
|
113 |
+
markdown_match = re.search(markdown_pattern, text, re.DOTALL)
|
114 |
+
if markdown_match:
|
115 |
+
text = markdown_match.group(1).strip()
|
116 |
+
|
117 |
+
triple_quotes_pattern = r'"""(?:json)?\s*(.*?)\s*"""'
|
118 |
+
triple_quotes_match = re.search(triple_quotes_pattern, text, re.DOTALL)
|
119 |
+
if triple_quotes_match:
|
120 |
+
text = triple_quotes_match.group(1).strip()
|
121 |
+
|
122 |
+
text = text.replace("`", '"')
|
123 |
|
124 |
try:
|
125 |
return json.loads(text)
|
126 |
except json.JSONDecodeError:
|
|
|
|
|
127 |
try:
|
128 |
+
fixed_text = re.sub(r'`([^`]*)`', r'"\1"', text)
|
129 |
+
return json.loads(fixed_text)
|
|
|
130 |
except json.JSONDecodeError:
|
131 |
+
# Try to extract key fields
|
132 |
+
result = {}
|
133 |
+
try:
|
134 |
+
bool_pattern = r'"(\w+)"\s*:\s*(true|false)'
|
135 |
+
for match in re.finditer(bool_pattern, text, re.IGNORECASE):
|
136 |
+
key, value = match.groups()
|
137 |
+
result[key] = value.lower() == "true"
|
138 |
+
|
139 |
+
str_pattern = r'"(\w+)"\s*:\s*"([^"]*)"'
|
140 |
+
for match in re.finditer(str_pattern, text):
|
141 |
+
key, value = match.groups()
|
142 |
+
result[key] = value
|
143 |
+
|
144 |
+
num_pattern = r'"(\w+)"\s*:\s*(-?\d+(?:\.\d+)?)'
|
145 |
+
for match in re.finditer(num_pattern, text):
|
146 |
+
key, value = match.groups()
|
147 |
+
try:
|
148 |
+
result[key] = int(value)
|
149 |
+
except ValueError:
|
150 |
+
result[key] = float(value)
|
151 |
+
|
152 |
+
empty_str_pattern = r'"(\w+)"\s*:\s*""'
|
153 |
+
for match in re.finditer(empty_str_pattern, text):
|
154 |
+
key = match.group(1)
|
155 |
+
result[key] = ""
|
156 |
+
|
157 |
+
if result:
|
158 |
+
return result
|
159 |
+
|
160 |
+
logger.warning(f"Failed to parse JSON output: {text}")
|
161 |
+
return {}
|
162 |
+
except Exception as e:
|
163 |
+
logger.warning(f"Error while extracting fields from JSON: {e}")
|
164 |
+
return {}
|
165 |
|
166 |
|
167 |
def _reload_image(image: Image.Image):
|
|
|
223 |
if isinstance(screenshot, bytes):
|
224 |
screenshot = io.BytesIO(screenshot)
|
225 |
|
|
|
226 |
image = Image.open(cast(BinaryIO, screenshot))
|
227 |
comp, visible_rects, rects_above, rects_below = _add_set_of_mark(image, ROIs)
|
228 |
image.close()
|
|
|
301 |
class BaseBrowser:
|
302 |
def __init__(self,
|
303 |
headless=True,
|
304 |
+
cache_dir: Optional[str] = None):
|
|
|
305 |
r"""Initialize the WebBrowserToolkit instance.
|
306 |
|
307 |
Args:
|
|
|
326 |
|
327 |
self.page_history = [] # stores the history of visited pages
|
328 |
|
|
|
329 |
# set the cache directory
|
330 |
self.cache_dir = "tmp/"
|
331 |
os.makedirs(self.cache_dir, exist_ok=True)
|
332 |
if cache_dir is not None:
|
333 |
self.cache_dir = cache_dir
|
334 |
+
|
335 |
# load the page script
|
336 |
+
abs_dir_path = os.path.dirname(os.path.abspath(__file__))
|
337 |
+
page_script_path = os.path.join(abs_dir_path, "page_script.js")
|
|
|
338 |
|
339 |
try:
|
340 |
with open(page_script_path, "r", encoding='utf-8') as f:
|
341 |
self.page_script = f.read()
|
342 |
f.close()
|
343 |
except FileNotFoundError:
|
344 |
+
raise FileNotFoundError(f"Page script file not found at path: {page_script_path}")
|
345 |
+
|
346 |
|
347 |
def init(self):
|
348 |
r"""Initialize the browser."""
|
|
|
365 |
# self.page.wait_for_load_state("networkidle", timeout=timeout_ms)
|
366 |
# self.page.wait_for_load_state("domcontentloaded", timeout=timeout_ms)
|
367 |
time.sleep(2)
|
368 |
+
|
369 |
|
370 |
def click_blank_area(self):
|
371 |
r"""Click a blank area of the page to unfocus the current element."""
|
|
|
412 |
file_path = None
|
413 |
if save_image:
|
414 |
# get url name to form a file name
|
415 |
+
url_name = self.page_url.split("/")[-1]
|
416 |
+
for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '.']:
|
417 |
+
url_name = url_name.replace(char, "_")
|
418 |
|
419 |
# get formatted time: mmddhhmmss
|
420 |
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
|
|
|
521 |
file_path = None
|
522 |
comp, visible_rects, rects_above, rects_below = add_set_of_mark(screenshot, rects)
|
523 |
if save_image:
|
524 |
+
url_name = self.page_url.split("/")[-1]
|
525 |
+
for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '.']:
|
526 |
+
url_name = url_name.replace(char, "_")
|
527 |
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
|
528 |
file_path = os.path.join(self.cache_dir, f"{url_name}_{timestamp}.png")
|
529 |
with open(file_path, "wb") as f:
|
|
|
550 |
identifier = str(identifier)
|
551 |
target = self.page.locator(f"[__elementId='{identifier}']")
|
552 |
|
|
|
553 |
try:
|
554 |
target.wait_for(timeout=5000)
|
555 |
except TimeoutError:
|
556 |
raise ValueError("No such element.") from None
|
557 |
|
|
|
558 |
target.scroll_into_view_if_needed()
|
559 |
|
560 |
new_page = None
|
|
|
568 |
if new_page:
|
569 |
self.page_history.append(deepcopy(self.page.url))
|
570 |
self.page = new_page
|
571 |
+
|
572 |
except PlaywrightError:
|
573 |
pass
|
574 |
+
|
575 |
self._wait_for_load()
|
576 |
|
577 |
|
578 |
def extract_url_content(self):
|
579 |
r"""Extract the content of the current page."""
|
|
|
580 |
content = self.page.content()
|
581 |
return content
|
582 |
|
|
|
754 |
}
|
755 |
""")
|
756 |
|
|
|
757 |
@retry(requests.RequestException)
|
758 |
def get_webpage_content(self) -> str:
|
759 |
self._wait_for_load()
|
|
|
763 |
return markdown_content
|
764 |
|
765 |
|
|
|
766 |
class WebToolkit(BaseToolkit):
|
767 |
+
r"""A class for browsing the web and interacting with web pages.
|
768 |
+
|
769 |
+
This class provides methods for browsing the web and interacting with web pages.
|
770 |
+
"""
|
771 |
def __init__(self,
|
772 |
+
headless: bool = True,
|
773 |
cache_dir: Optional[str] = None,
|
|
|
774 |
history_window: int = 5,
|
775 |
web_agent_model: Optional[BaseModelBackend] = None,
|
776 |
planning_agent_model: Optional[BaseModelBackend] = None,
|
777 |
+
output_language: str = "en",
|
778 |
):
|
779 |
+
r"""Initialize the WebToolkit instance.
|
780 |
+
|
781 |
+
Args:
|
782 |
+
headless (bool): Whether to run the browser in headless mode.
|
783 |
+
cache_dir (Union[str, None]): The directory to store cache files.
|
784 |
+
history_window (int): The window size for storing the history of actions.
|
785 |
+
web_agent_model (Optional[BaseModelBackend]): The model backend for the web agent.
|
786 |
+
planning_agent_model (Optional[BaseModelBackend]): The model backend for the planning agent.
|
787 |
+
"""
|
788 |
|
789 |
self.browser = BaseBrowser(
|
790 |
headless=headless,
|
791 |
+
cache_dir=cache_dir
|
|
|
792 |
)
|
793 |
|
794 |
self.history_window = history_window
|
795 |
self.web_agent_model = web_agent_model
|
796 |
self.planning_agent_model = planning_agent_model
|
797 |
+
self.output_language = output_language
|
798 |
|
799 |
self.history = []
|
|
|
800 |
self.web_agent, self.planning_agent = self._initialize_agent()
|
801 |
|
802 |
|
|
|
834 |
web_agent = ChatAgent(
|
835 |
system_message=system_prompt,
|
836 |
model=web_agent_model,
|
837 |
+
output_language=self.output_language
|
838 |
)
|
839 |
|
840 |
planning_system_prompt = """
|
|
|
843 |
|
844 |
planning_agent = ChatAgent(
|
845 |
system_message=planning_system_prompt,
|
846 |
+
model=planning_model,
|
847 |
+
output_language=self.output_language
|
848 |
)
|
849 |
|
850 |
return web_agent, planning_agent
|
|
|
920 |
observation_result: str = resp_dict.get("observation", "")
|
921 |
reasoning_result: str = resp_dict.get("reasoning", "")
|
922 |
action_code: str = resp_dict.get("action_code", "")
|
923 |
+
|
924 |
+
if action_code and "(" in action_code and ")" not in action_code:
|
925 |
+
action_match = re.search(r'"action_code"\s*:\s*[`"]([^`"]*\([^)]*\))[`"]', resp_content)
|
926 |
+
if action_match:
|
927 |
+
action_code = action_match.group(1)
|
928 |
+
else:
|
929 |
+
logger.warning(f"Incomplete action_code detected: {action_code}")
|
930 |
+
if action_code.startswith("fill_input_id("):
|
931 |
+
parts = action_code.split(",", 1)
|
932 |
+
if len(parts) > 1:
|
933 |
+
id_part = parts[0].replace("fill_input_id(", "").strip()
|
934 |
+
action_code = f'fill_input_id({id_part}, "Please fill the text here.")'
|
935 |
+
|
936 |
action_code = action_code.replace("`", "").strip()
|
937 |
|
938 |
return observation_result, reasoning_result, action_code
|
|
|
979 |
r"""Get the final answer based on the task prompt and current browser state.
|
980 |
It is used when the agent thinks that the task can be completed without any further action, and answer can be directly found in the current viewport.
|
981 |
"""
|
|
|
|
|
982 |
|
983 |
prompt = f"""
|
984 |
We are solving a complex web task which needs multi-step browser interaction. After the multi-step observation, reasoning and acting with web browser, we think that the task is currently solved.
|
|
|
1094 |
return False, replanned_schema
|
1095 |
|
1096 |
|
1097 |
+
@dependencies_required("playwright")
|
1098 |
def browser_simulation(self,
|
1099 |
task_prompt: str,
|
1100 |
start_url: str,
|
1101 |
+
round_limit: int = 12
|
1102 |
) -> str:
|
1103 |
r"""A powerful toolkit which can simulate the browser interaction to solve the task which needs multi-step actions.
|
1104 |
|
1105 |
Args:
|
1106 |
task_prompt (str): The task prompt to solve.
|
1107 |
start_url (str): The start URL to visit.
|
1108 |
+
round_limit (int): The round limit to solve the task (default: 12).
|
1109 |
|
1110 |
Returns:
|
1111 |
str: The simulation result to the task.
|
1112 |
"""
|
1113 |
+
|
|
|
|
|
1114 |
self._reset()
|
1115 |
task_completed = False
|
|
|
1116 |
detailed_plan = self._task_planning(task_prompt, start_url)
|
1117 |
logger.debug(f"Detailed plan: {detailed_plan}")
|
1118 |
|
1119 |
self.browser.init()
|
1120 |
self.browser.visit_page(start_url)
|
1121 |
|
1122 |
+
for i in range(round_limit):
|
1123 |
observation, reasoning, action_code = self._observe(task_prompt, detailed_plan)
|
1124 |
logger.debug(f"Observation: {observation}")
|
1125 |
logger.debug(f"Reasoning: {reasoning}")
|
1126 |
logger.debug(f"Action code: {action_code}")
|
|
|
1127 |
|
1128 |
if "stop" in action_code:
|
1129 |
task_completed = True
|
owl/run_qwen_mini_zh.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
load_dotenv()
|
3 |
+
|
4 |
+
from camel.models import ModelFactory
|
5 |
+
from camel.toolkits import WebToolkit,SearchToolkit,FunctionTool
|
6 |
+
from camel.types import ModelPlatformType,ModelType
|
7 |
+
|
8 |
+
from loguru import logger
|
9 |
+
|
10 |
+
from utils import OwlRolePlaying, run_society
|
11 |
+
import os
|
12 |
+
|
13 |
+
|
14 |
+
model_scope_api_key = os.getenv("MODELSCOPE_API_KEY")
|
15 |
+
|
16 |
+
def construct_society(question: str) -> OwlRolePlaying:
|
17 |
+
r"""Construct the society based on the question."""
|
18 |
+
|
19 |
+
user_role_name = "user"
|
20 |
+
assistant_role_name = "assistant"
|
21 |
+
|
22 |
+
user_model = ModelFactory.create(
|
23 |
+
model_platform=ModelPlatformType.QWEN,
|
24 |
+
model_type="qwen-max",
|
25 |
+
model_config_dict={"temperature": 0},
|
26 |
+
)
|
27 |
+
|
28 |
+
assistant_model = ModelFactory.create(
|
29 |
+
model_platform=ModelPlatformType.QWEN,
|
30 |
+
model_type="qwen-max",
|
31 |
+
model_config_dict={"temperature": 0},
|
32 |
+
)
|
33 |
+
|
34 |
+
search_model = ModelFactory.create(
|
35 |
+
model_platform=ModelPlatformType.QWEN,
|
36 |
+
model_type="qwen-max",
|
37 |
+
model_config_dict={"temperature": 0},
|
38 |
+
)
|
39 |
+
|
40 |
+
planning_model = ModelFactory.create(
|
41 |
+
model_platform=ModelPlatformType.QWEN,
|
42 |
+
model_type="qwen-max",
|
43 |
+
model_config_dict={"temperature": 0},
|
44 |
+
)
|
45 |
+
|
46 |
+
web_model = ModelFactory.create(
|
47 |
+
model_platform=ModelPlatformType.QWEN,
|
48 |
+
model_type="qwen-vl-plus-latest",
|
49 |
+
model_config_dict={"temperature": 0},
|
50 |
+
)
|
51 |
+
|
52 |
+
tools_list = [
|
53 |
+
*WebToolkit(
|
54 |
+
headless=False,
|
55 |
+
web_agent_model=web_model,
|
56 |
+
planning_agent_model=planning_model,
|
57 |
+
output_language='中文'
|
58 |
+
).get_tools(),
|
59 |
+
FunctionTool(SearchToolkit(model=search_model).search_duckduckgo),
|
60 |
+
]
|
61 |
+
|
62 |
+
user_role_name = 'user'
|
63 |
+
user_agent_kwargs = dict(model=user_model)
|
64 |
+
assistant_role_name = 'assistant'
|
65 |
+
assistant_agent_kwargs = dict(model=assistant_model,
|
66 |
+
tools=tools_list)
|
67 |
+
|
68 |
+
task_kwargs = {
|
69 |
+
'task_prompt': question,
|
70 |
+
'with_task_specify': False,
|
71 |
+
'output_language': '中文',
|
72 |
+
}
|
73 |
+
|
74 |
+
society = OwlRolePlaying(
|
75 |
+
**task_kwargs,
|
76 |
+
user_role_name=user_role_name,
|
77 |
+
user_agent_kwargs=user_agent_kwargs,
|
78 |
+
assistant_role_name=assistant_role_name,
|
79 |
+
assistant_agent_kwargs=assistant_agent_kwargs,
|
80 |
+
)
|
81 |
+
|
82 |
+
return society
|
83 |
+
|
84 |
+
|
85 |
+
# Example case
|
86 |
+
question = "打开小红书上浏览推荐栏目下的前三个笔记内容,不要登陆,之后给我一个总结报告"
|
87 |
+
|
88 |
+
society = construct_society(question)
|
89 |
+
answer, chat_history, token_count = run_society(society)
|
90 |
+
|
91 |
+
logger.success(f"Answer: {answer}")
|
92 |
+
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|