Spaces:
Runtime error
Runtime error
Shunfeng Zheng
commited on
Delete 题目转化.py
Browse files
题目转化.py
DELETED
@@ -1,140 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import re
|
3 |
-
import time
|
4 |
-
from openai import OpenAI
|
5 |
-
|
6 |
-
# ========= Parameters =========
|
7 |
-
n = 100 # Only generate for the first n data points with empty instruction
|
8 |
-
input_path = "localization_samples.json"
|
9 |
-
output_path = "localization_with_instruction.json"
|
10 |
-
|
11 |
-
model = "gpt-4o"
|
12 |
-
# ==============================
|
13 |
-
|
14 |
-
client = OpenAI(api_key="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")
|
15 |
-
|
16 |
-
system_prompt = (
|
17 |
-
"You are a geospatial task design expert. The user will provide a JSON data point containing a series of function-based steps (`steps`). "
|
18 |
-
"Your job is to generate a natural and meaningful instruction (`instruction`) based on these steps.\n\n"
|
19 |
-
"There are four types of functions you might encounter:\n"
|
20 |
-
"1. Coords: Input is a location name, output is the coordinates of that location.\n"
|
21 |
-
"2. Relative: Input is a region (or place name), a direction, and a distance. Output is a new region offset by that direction and distance.\n"
|
22 |
-
"3. Between: Input is two coordinates (or place names), output is the midpoint between them.\n"
|
23 |
-
"4. Azimuth: Similar to Relative, but the direction is represented as an angle instead of a word.\n\n"
|
24 |
-
"Your tasks are:\n"
|
25 |
-
"- Complete the `instruction` field in the given JSON data point;\n"
|
26 |
-
"- Replace all placeholders like LOC_1, LOC_2 with real global place names;\n"
|
27 |
-
"- Also update the `steps` so that LOC_1, LOC_2 are replaced accordingly;\n"
|
28 |
-
"- The final `instruction` should NOT contain any LOC_x placeholders;\n"
|
29 |
-
"- Create a realistic scenario, such as navigation, trip planning, or station setup;\n"
|
30 |
-
"- First explain your reasoning: why you chose this scenario and these places;\n"
|
31 |
-
"- Then output a JSON object with the following format, enclosed between ```json and ```:\n\n"
|
32 |
-
"Format:\n"
|
33 |
-
"```json\n"
|
34 |
-
"{\n"
|
35 |
-
' "index": xxx,\n'
|
36 |
-
' "instruction": "....",\n'
|
37 |
-
' "steps": [\n'
|
38 |
-
" {\"id\": 1, \"function\": ..., \"inputs\": [...]},\n"
|
39 |
-
" ...\n"
|
40 |
-
" ]\n"
|
41 |
-
"}\n"
|
42 |
-
"```"
|
43 |
-
)
|
44 |
-
|
45 |
-
def extract_json_block(text):
|
46 |
-
match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)
|
47 |
-
if match:
|
48 |
-
try:
|
49 |
-
return json.loads(match.group(1))
|
50 |
-
except json.JSONDecodeError:
|
51 |
-
return None
|
52 |
-
return None
|
53 |
-
|
54 |
-
def format_step(step):
|
55 |
-
inputs = [
|
56 |
-
f'"{inp}"' if isinstance(inp, str) else str(inp)
|
57 |
-
for inp in step["inputs"]
|
58 |
-
]
|
59 |
-
return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": [{", ".join(inputs)}]}}'
|
60 |
-
|
61 |
-
# Load original data
|
62 |
-
with open(input_path, "r", encoding="utf-8") as f:
|
63 |
-
data = json.load(f)
|
64 |
-
|
65 |
-
completed = 0
|
66 |
-
output_data = []
|
67 |
-
|
68 |
-
for item in data:
|
69 |
-
original_item = item.copy()
|
70 |
-
|
71 |
-
if item.get("instruction", "").strip() == "":
|
72 |
-
if completed >= n:
|
73 |
-
break
|
74 |
-
|
75 |
-
user_prompt = f"Here is a geospatial task. Please generate an appropriate instruction according to the rules:\n{json.dumps(item, ensure_ascii=False)}"
|
76 |
-
|
77 |
-
try:
|
78 |
-
response = client.chat.completions.create(
|
79 |
-
model=model,
|
80 |
-
messages=[
|
81 |
-
{"role": "system", "content": system_prompt},
|
82 |
-
{"role": "user", "content": user_prompt},
|
83 |
-
]
|
84 |
-
)
|
85 |
-
content = response.choices[0].message.content.strip()
|
86 |
-
parsed = extract_json_block(content)
|
87 |
-
item = parsed
|
88 |
-
completed += 1
|
89 |
-
|
90 |
-
except Exception as e:
|
91 |
-
print(f"[✗] index {item['index']} error: {e}")
|
92 |
-
item = original_item
|
93 |
-
continue
|
94 |
-
|
95 |
-
with open('localization_with_instruction.json', "r", encoding="utf-8") as f:
|
96 |
-
try:
|
97 |
-
datapoint = json.load(f)
|
98 |
-
except:
|
99 |
-
datapoint = []
|
100 |
-
|
101 |
-
datapoint.append(item)
|
102 |
-
|
103 |
-
with open(output_path, "w", encoding="utf-8") as f:
|
104 |
-
json.dump(datapoint, f, ensure_ascii=False, indent=2)
|
105 |
-
|
106 |
-
|
107 |
-
time.sleep(1.5)
|
108 |
-
|
109 |
-
output_data.append(item)
|
110 |
-
|
111 |
-
# # Save full output as JSON
|
112 |
-
# with open(output_path, "w", encoding="utf-8") as f:
|
113 |
-
# json.dump(output_data, f, ensure_ascii=False, indent=2)
|
114 |
-
|
115 |
-
print(f"\n✅ Completed {completed} data points. Output saved to {output_path}")
|
116 |
-
|
117 |
-
def write_custom_json(data, filename):
|
118 |
-
def format_step(step):
|
119 |
-
inputs = json.dumps(step["inputs"], ensure_ascii=False)
|
120 |
-
return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}'
|
121 |
-
|
122 |
-
with open(filename, "w", encoding="utf-8") as f:
|
123 |
-
f.write("[\n")
|
124 |
-
for i, item in enumerate(data):
|
125 |
-
f.write(" {\n")
|
126 |
-
f.write(f' "index": {item["index"]},\n')
|
127 |
-
instruction = json.dumps(item["instruction"], ensure_ascii=False)
|
128 |
-
f.write(f' "instruction": {instruction},\n')
|
129 |
-
f.write(' "steps": [\n')
|
130 |
-
step_lines = [f" {format_step(step)}" for step in item["steps"]]
|
131 |
-
f.write(",\n".join(step_lines))
|
132 |
-
f.write("\n ]\n")
|
133 |
-
f.write(" }" + (",\n" if i < len(data) - 1 else "\n"))
|
134 |
-
f.write("]\n")
|
135 |
-
|
136 |
-
# Regenerate custom formatted JSON
|
137 |
-
with open("localization_with_instruction.json", "r", encoding="utf-8") as f:
|
138 |
-
data = json.load(f)
|
139 |
-
|
140 |
-
write_custom_json(data, "localization_with_instruction.json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|