Yuhang Zhou commited on
Commit
51cdf08
·
2 Parent(s): 075a9b5 340d2df

Update web_toolkit.py (#121)

Browse files
Files changed (1) hide show
  1. owl/camel/toolkits/web_toolkit.py +98 -40
owl/camel/toolkits/web_toolkit.py CHANGED
@@ -107,61 +107,119 @@ def _get_bool(d: Any, k: str) -> bool:
107
 
108
 
109
  def _parse_json_output(text: str) -> Dict[str, Any]:
110
- """Extract JSON output from a string."""
111
 
112
  markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
113
  markdown_match = re.search(markdown_pattern, text, re.DOTALL)
114
  if markdown_match:
115
  text = markdown_match.group(1).strip()
116
 
117
- triple_quotes_pattern = r'"""(?:json)?\s*(.*?)\s*"""'
118
- triple_quotes_match = re.search(triple_quotes_pattern, text, re.DOTALL)
119
- if triple_quotes_match:
120
- text = triple_quotes_match.group(1).strip()
121
-
122
- text = text.replace("`", '"')
123
-
124
  try:
125
  return json.loads(text)
126
  except json.JSONDecodeError:
 
127
  try:
128
- fixed_text = re.sub(r'`([^`]*)`', r'"\1"', text)
129
  return json.loads(fixed_text)
130
  except json.JSONDecodeError:
131
- # Try to extract key fields
132
- result = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  try:
134
- bool_pattern = r'"(\w+)"\s*:\s*(true|false)'
135
- for match in re.finditer(bool_pattern, text, re.IGNORECASE):
136
- key, value = match.groups()
137
- result[key] = value.lower() == "true"
138
-
139
- str_pattern = r'"(\w+)"\s*:\s*"([^"]*)"'
140
- for match in re.finditer(str_pattern, text):
141
- key, value = match.groups()
142
- result[key] = value
143
-
144
- num_pattern = r'"(\w+)"\s*:\s*(-?\d+(?:\.\d+)?)'
145
- for match in re.finditer(num_pattern, text):
146
- key, value = match.groups()
147
- try:
148
- result[key] = int(value)
149
- except ValueError:
150
- result[key] = float(value)
151
-
152
- empty_str_pattern = r'"(\w+)"\s*:\s*""'
153
- for match in re.finditer(empty_str_pattern, text):
154
- key = match.group(1)
155
- result[key] = ""
 
 
 
156
 
157
- if result:
158
- return result
 
 
 
 
 
 
 
 
 
 
159
 
160
- logger.warning(f"Failed to parse JSON output: {text}")
161
- return {}
162
- except Exception as e:
163
- logger.warning(f"Error while extracting fields from JSON: {e}")
164
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
 
167
  def _reload_image(image: Image.Image):
 
107
 
108
 
109
  def _parse_json_output(text: str) -> Dict[str, Any]:
110
+ """Extract JSON output from text, supporting non-standard formats and special characters."""
111
 
112
  markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
113
  markdown_match = re.search(markdown_pattern, text, re.DOTALL)
114
  if markdown_match:
115
  text = markdown_match.group(1).strip()
116
 
117
+ text = re.sub(r':\s*`([^`]*)`', r': "\1"', text)
118
+
 
 
 
 
 
119
  try:
120
  return json.loads(text)
121
  except json.JSONDecodeError:
122
+ fixed_text = re.sub(r'(?<!\\)"([^"]*?)(?<!\\)"', r'"\1"', text)
123
  try:
 
124
  return json.loads(fixed_text)
125
  except json.JSONDecodeError:
126
+ pass
127
+
128
+ start_brace = text.find('{')
129
+ if start_brace == -1:
130
+ return {}
131
+
132
+ brace_count = 0
133
+ end_brace = -1
134
+
135
+ for i in range(start_brace, len(text)):
136
+ if text[i] == '{':
137
+ brace_count += 1
138
+ elif text[i] == '}':
139
+ brace_count -= 1
140
+ if brace_count == 0:
141
+ end_brace = i
142
+ break
143
+
144
+ if end_brace == -1:
145
+ return {}
146
+
147
+ json_text = text[start_brace:end_brace+1]
148
+ result = {}
149
+
150
+
151
+ simple_pairs = re.finditer(r'"([^"]+)"\s*:\s*(true|false|null|\d+(?:\.\d+)?)', json_text)
152
+ for match in simple_pairs:
153
+ key, value = match.groups()
154
+ if value.lower() == 'true':
155
+ result[key] = True
156
+ elif value.lower() == 'false':
157
+ result[key] = False
158
+ elif value.lower() == 'null':
159
+ result[key] = None
160
+ else:
161
  try:
162
+ result[key] = float(value) if '.' in value else int(value)
163
+ except ValueError:
164
+ result[key] = value
165
+
166
+ keys = re.findall(r'"([^"]+)"\s*:', json_text)
167
+
168
+ for key in keys:
169
+ if key in result:
170
+ continue
171
+
172
+ key_pattern = f'"{re.escape(key)}"\\s*:'
173
+ key_match = re.search(key_pattern, json_text)
174
+ if not key_match:
175
+ continue
176
+
177
+ value_start = key_match.end()
178
+ value_text = json_text[value_start:].lstrip()
179
+
180
+ if value_text.startswith('"'):
181
+ i = 1
182
+ escaped = False
183
+ string_value = ""
184
+
185
+ while i < len(value_text):
186
+ char = value_text[i]
187
 
188
+ if escaped:
189
+ if char in ['"', '\\', '/', 'b', 'f', 'n', 'r', 't']:
190
+ string_value += {'n':'\n', 'r':'\r', 't':'\t', 'b':'\b', 'f':'\f'}.get(char, char)
191
+ else:
192
+ string_value += '\\' + char
193
+ escaped = False
194
+ elif char == '\\':
195
+ escaped = True
196
+ elif char == '"':
197
+ break
198
+ else:
199
+ string_value += char
200
 
201
+ i += 1
202
+
203
+ result[key] = string_value
204
+
205
+ elif value_text.startswith('{') or value_text.startswith('['):
206
+ bracket = '{' if value_text.startswith('{') else '['
207
+ closing_bracket = '}' if bracket == '{' else ']'
208
+ bracket_count = 0
209
+
210
+ for i, char in enumerate(value_text):
211
+ if char == bracket:
212
+ bracket_count += 1
213
+ elif char == closing_bracket:
214
+ bracket_count -= 1
215
+ if bracket_count == 0:
216
+ try:
217
+ result[key] = json.loads(value_text[:i+1])
218
+ except json.JSONDecodeError:
219
+ result[key] = value_text[:i+1]
220
+ break
221
+
222
+ return result
223
 
224
 
225
  def _reload_image(image: Image.Image):