Sun Tao commited on
Commit
a093f82
·
1 Parent(s): d2c0614

Update web_toolkit.py

Browse files
Files changed (1) hide show
  1. owl/camel/toolkits/web_toolkit.py +60 -18
owl/camel/toolkits/web_toolkit.py CHANGED
@@ -106,26 +106,60 @@ def _get_bool(d: Any, k: str) -> bool:
106
 
107
 
108
  def _parse_json_output(text: str) -> Dict[str, Any]:
109
- # judge if text is markdown format (```json ````)
110
- if "```json" in text and "```" in text:
111
- # Extract content between ```json and the last ```
112
- start_idx = text.find("```json") + len("```json")
113
- end_idx = text.rfind("```")
114
- if start_idx > -1 and end_idx > start_idx:
115
- text = text[start_idx:end_idx].strip()
 
 
 
 
 
 
116
 
117
  try:
118
  return json.loads(text)
119
  except json.JSONDecodeError:
120
- logger.warning(f"Failed to parse JSON output: {text}")
121
- # Try to clean the text further and attempt parsing again
122
  try:
123
- # Remove any extra whitespace or control characters
124
- cleaned_text = text.strip()
125
- return json.loads(cleaned_text)
126
  except json.JSONDecodeError:
127
- logger.error(f"Failed to parse JSON even after cleaning: {text}")
128
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
 
131
  def _reload_image(image: Image.Image):
@@ -379,7 +413,9 @@ class BaseBrowser:
379
  file_path = None
380
  if save_image:
381
  # get url name to form a file name
382
- url_name = self.page_url.split("/")[-1].replace(".", "_").replace(":", "_")
 
 
383
 
384
  # get formatted time: mmddhhmmss
385
  timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
@@ -486,14 +522,16 @@ class BaseBrowser:
486
  file_path = None
487
  comp, visible_rects, rects_above, rects_below = add_set_of_mark(screenshot, rects)
488
  if save_image:
489
- url_name = self.page_url.split("/")[-1].replace(".", "_").replace(":", "_")
 
 
490
  timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
491
  file_path = os.path.join(self.cache_dir, f"{url_name}_{timestamp}.png")
492
  with open(file_path, "wb") as f:
493
  comp.save(f, "PNG")
494
  f.close()
495
 
496
- return comp, file_path
497
 
498
 
499
  def scroll_up(self) -> None:
@@ -738,6 +776,7 @@ class WebToolkit(BaseToolkit):
738
  history_window: int = 5,
739
  web_agent_model: Optional[BaseModelBackend] = None,
740
  planning_agent_model: Optional[BaseModelBackend] = None,
 
741
  ):
742
 
743
  self.browser = BaseBrowser(
@@ -749,6 +788,7 @@ class WebToolkit(BaseToolkit):
749
  self.history_window = history_window
750
  self.web_agent_model = web_agent_model
751
  self.planning_agent_model = planning_agent_model
 
752
 
753
  self.history = []
754
  # self.search_toolkit = SearchToolkit()
@@ -789,6 +829,7 @@ Given a high-level task, you can leverage predefined browser tools to help users
789
  web_agent = ChatAgent(
790
  system_message=system_prompt,
791
  model=web_agent_model,
 
792
  )
793
 
794
  planning_system_prompt = """
@@ -797,7 +838,8 @@ You are a helpful planning agent that can assist users in planning complex tasks
797
 
798
  planning_agent = ChatAgent(
799
  system_message=planning_system_prompt,
800
- model=planning_model
 
801
  )
802
 
803
  return web_agent, planning_agent
 
106
 
107
 
108
  def _parse_json_output(text: str) -> Dict[str, Any]:
109
+ """Extract JSON output from a string."""
110
+
111
+ markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
112
+ markdown_match = re.search(markdown_pattern, text, re.DOTALL)
113
+ if markdown_match:
114
+ text = markdown_match.group(1).strip()
115
+
116
+ triple_quotes_pattern = r'"""(?:json)?\s*(.*?)\s*"""'
117
+ triple_quotes_match = re.search(triple_quotes_pattern, text, re.DOTALL)
118
+ if triple_quotes_match:
119
+ text = triple_quotes_match.group(1).strip()
120
+
121
+ text = text.replace("`", '"')
122
 
123
  try:
124
  return json.loads(text)
125
  except json.JSONDecodeError:
 
 
126
  try:
127
+ fixed_text = re.sub(r'`([^`]*)`', r'"\1"', text)
128
+ return json.loads(fixed_text)
 
129
  except json.JSONDecodeError:
130
+ result = {}
131
+ try:
132
+ bool_pattern = r'"(\w+)"\s*:\s*(true|false)'
133
+ for match in re.finditer(bool_pattern, text, re.IGNORECASE):
134
+ key, value = match.groups()
135
+ result[key] = value.lower() == "true"
136
+
137
+ str_pattern = r'"(\w+)"\s*:\s*"([^"]*)"'
138
+ for match in re.finditer(str_pattern, text):
139
+ key, value = match.groups()
140
+ result[key] = value
141
+
142
+ num_pattern = r'"(\w+)"\s*:\s*(-?\d+(?:\.\d+)?)'
143
+ for match in re.finditer(num_pattern, text):
144
+ key, value = match.groups()
145
+ try:
146
+ result[key] = int(value)
147
+ except ValueError:
148
+ result[key] = float(value)
149
+
150
+ empty_str_pattern = r'"(\w+)"\s*:\s*""'
151
+ for match in re.finditer(empty_str_pattern, text):
152
+ key = match.group(1)
153
+ result[key] = ""
154
+
155
+ if result:
156
+ return result
157
+
158
+ logger.warning(f"Failed to parse JSON output: {text}")
159
+ return {}
160
+ except Exception as e:
161
+ logger.warning(f"Error while extracting fields from JSON: {e}")
162
+ return {}
163
 
164
 
165
  def _reload_image(image: Image.Image):
 
413
  file_path = None
414
  if save_image:
415
  # get url name to form a file name
416
+ url_name = self.page_url.split("/")[-1]
417
+ for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '.']:
418
+ url_name = url_name.replace(char, "_")
419
 
420
  # get formatted time: mmddhhmmss
421
  timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
 
522
  file_path = None
523
  comp, visible_rects, rects_above, rects_below = add_set_of_mark(screenshot, rects)
524
  if save_image:
525
+ url_name = self.page_url.split("/")[-1]
526
+ for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '.']:
527
+ url_name = url_name.replace(char, "_")
528
  timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
529
  file_path = os.path.join(self.cache_dir, f"{url_name}_{timestamp}.png")
530
  with open(file_path, "wb") as f:
531
  comp.save(f, "PNG")
532
  f.close()
533
 
534
+ return comp, file_path
535
 
536
 
537
  def scroll_up(self) -> None:
 
776
  history_window: int = 5,
777
  web_agent_model: Optional[BaseModelBackend] = None,
778
  planning_agent_model: Optional[BaseModelBackend] = None,
779
+ output_language: str = "en"
780
  ):
781
 
782
  self.browser = BaseBrowser(
 
788
  self.history_window = history_window
789
  self.web_agent_model = web_agent_model
790
  self.planning_agent_model = planning_agent_model
791
+ self.output_language = output_language
792
 
793
  self.history = []
794
  # self.search_toolkit = SearchToolkit()
 
829
  web_agent = ChatAgent(
830
  system_message=system_prompt,
831
  model=web_agent_model,
832
+ output_language=self.output_language
833
  )
834
 
835
  planning_system_prompt = """
 
838
 
839
  planning_agent = ChatAgent(
840
  system_message=planning_system_prompt,
841
+ model=planning_model,
842
+ output_language=self.output_language
843
  )
844
 
845
  return web_agent, planning_agent