Update web_toolkit.py (#121)
Browse files
owl/camel/toolkits/web_toolkit.py
CHANGED
@@ -107,61 +107,119 @@ def _get_bool(d: Any, k: str) -> bool:
|
|
107 |
|
108 |
|
109 |
def _parse_json_output(text: str) -> Dict[str, Any]:
|
110 |
-
"""Extract JSON output from
|
111 |
|
112 |
markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
|
113 |
markdown_match = re.search(markdown_pattern, text, re.DOTALL)
|
114 |
if markdown_match:
|
115 |
text = markdown_match.group(1).strip()
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
if triple_quotes_match:
|
120 |
-
text = triple_quotes_match.group(1).strip()
|
121 |
-
|
122 |
-
text = text.replace("`", '"')
|
123 |
-
|
124 |
try:
|
125 |
return json.loads(text)
|
126 |
except json.JSONDecodeError:
|
|
|
127 |
try:
|
128 |
-
fixed_text = re.sub(r'`([^`]*)`', r'"\1"', text)
|
129 |
return json.loads(fixed_text)
|
130 |
except json.JSONDecodeError:
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
try:
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
156 |
|
157 |
-
if
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
|
167 |
def _reload_image(image: Image.Image):
|
|
|
107 |
|
108 |
|
109 |
def _parse_json_output(text: str) -> Dict[str, Any]:
|
110 |
+
"""Extract JSON output from text, supporting non-standard formats and special characters."""
|
111 |
|
112 |
markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
|
113 |
markdown_match = re.search(markdown_pattern, text, re.DOTALL)
|
114 |
if markdown_match:
|
115 |
text = markdown_match.group(1).strip()
|
116 |
|
117 |
+
text = re.sub(r':\s*`([^`]*)`', r': "\1"', text)
|
118 |
+
|
|
|
|
|
|
|
|
|
|
|
119 |
try:
|
120 |
return json.loads(text)
|
121 |
except json.JSONDecodeError:
|
122 |
+
fixed_text = re.sub(r'(?<!\\)"([^"]*?)(?<!\\)"', r'"\1"', text)
|
123 |
try:
|
|
|
124 |
return json.loads(fixed_text)
|
125 |
except json.JSONDecodeError:
|
126 |
+
pass
|
127 |
+
|
128 |
+
start_brace = text.find('{')
|
129 |
+
if start_brace == -1:
|
130 |
+
return {}
|
131 |
+
|
132 |
+
brace_count = 0
|
133 |
+
end_brace = -1
|
134 |
+
|
135 |
+
for i in range(start_brace, len(text)):
|
136 |
+
if text[i] == '{':
|
137 |
+
brace_count += 1
|
138 |
+
elif text[i] == '}':
|
139 |
+
brace_count -= 1
|
140 |
+
if brace_count == 0:
|
141 |
+
end_brace = i
|
142 |
+
break
|
143 |
+
|
144 |
+
if end_brace == -1:
|
145 |
+
return {}
|
146 |
+
|
147 |
+
json_text = text[start_brace:end_brace+1]
|
148 |
+
result = {}
|
149 |
+
|
150 |
+
|
151 |
+
simple_pairs = re.finditer(r'"([^"]+)"\s*:\s*(true|false|null|\d+(?:\.\d+)?)', json_text)
|
152 |
+
for match in simple_pairs:
|
153 |
+
key, value = match.groups()
|
154 |
+
if value.lower() == 'true':
|
155 |
+
result[key] = True
|
156 |
+
elif value.lower() == 'false':
|
157 |
+
result[key] = False
|
158 |
+
elif value.lower() == 'null':
|
159 |
+
result[key] = None
|
160 |
+
else:
|
161 |
try:
|
162 |
+
result[key] = float(value) if '.' in value else int(value)
|
163 |
+
except ValueError:
|
164 |
+
result[key] = value
|
165 |
+
|
166 |
+
keys = re.findall(r'"([^"]+)"\s*:', json_text)
|
167 |
+
|
168 |
+
for key in keys:
|
169 |
+
if key in result:
|
170 |
+
continue
|
171 |
+
|
172 |
+
key_pattern = f'"{re.escape(key)}"\\s*:'
|
173 |
+
key_match = re.search(key_pattern, json_text)
|
174 |
+
if not key_match:
|
175 |
+
continue
|
176 |
+
|
177 |
+
value_start = key_match.end()
|
178 |
+
value_text = json_text[value_start:].lstrip()
|
179 |
+
|
180 |
+
if value_text.startswith('"'):
|
181 |
+
i = 1
|
182 |
+
escaped = False
|
183 |
+
string_value = ""
|
184 |
+
|
185 |
+
while i < len(value_text):
|
186 |
+
char = value_text[i]
|
187 |
|
188 |
+
if escaped:
|
189 |
+
if char in ['"', '\\', '/', 'b', 'f', 'n', 'r', 't']:
|
190 |
+
string_value += {'n':'\n', 'r':'\r', 't':'\t', 'b':'\b', 'f':'\f'}.get(char, char)
|
191 |
+
else:
|
192 |
+
string_value += '\\' + char
|
193 |
+
escaped = False
|
194 |
+
elif char == '\\':
|
195 |
+
escaped = True
|
196 |
+
elif char == '"':
|
197 |
+
break
|
198 |
+
else:
|
199 |
+
string_value += char
|
200 |
|
201 |
+
i += 1
|
202 |
+
|
203 |
+
result[key] = string_value
|
204 |
+
|
205 |
+
elif value_text.startswith('{') or value_text.startswith('['):
|
206 |
+
bracket = '{' if value_text.startswith('{') else '['
|
207 |
+
closing_bracket = '}' if bracket == '{' else ']'
|
208 |
+
bracket_count = 0
|
209 |
+
|
210 |
+
for i, char in enumerate(value_text):
|
211 |
+
if char == bracket:
|
212 |
+
bracket_count += 1
|
213 |
+
elif char == closing_bracket:
|
214 |
+
bracket_count -= 1
|
215 |
+
if bracket_count == 0:
|
216 |
+
try:
|
217 |
+
result[key] = json.loads(value_text[:i+1])
|
218 |
+
except json.JSONDecodeError:
|
219 |
+
result[key] = value_text[:i+1]
|
220 |
+
break
|
221 |
+
|
222 |
+
return result
|
223 |
|
224 |
|
225 |
def _reload_image(image: Image.Image):
|