Shunfeng Zheng commited on
Commit
f6dcde4
·
verified ·
1 Parent(s): 5ba8304

Delete 题目转化.py

Browse files
Files changed (1) hide show
  1. 题目转化.py +0 -140
题目转化.py DELETED
@@ -1,140 +0,0 @@
1
- import json
2
- import re
3
- import time
4
- from openai import OpenAI
5
-
6
- # ========= Parameters =========
7
- n = 100 # Only generate for the first n data points with empty instruction
8
- input_path = "localization_samples.json"
9
- output_path = "localization_with_instruction.json"
10
-
11
- model = "gpt-4o"
12
- # ==============================
13
-
14
- client = OpenAI(api_key="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")
15
-
16
- system_prompt = (
17
- "You are a geospatial task design expert. The user will provide a JSON data point containing a series of function-based steps (`steps`). "
18
- "Your job is to generate a natural and meaningful instruction (`instruction`) based on these steps.\n\n"
19
- "There are four types of functions you might encounter:\n"
20
- "1. Coords: Input is a location name, output is the coordinates of that location.\n"
21
- "2. Relative: Input is a region (or place name), a direction, and a distance. Output is a new region offset by that direction and distance.\n"
22
- "3. Between: Input is two coordinates (or place names), output is the midpoint between them.\n"
23
- "4. Azimuth: Similar to Relative, but the direction is represented as an angle instead of a word.\n\n"
24
- "Your tasks are:\n"
25
- "- Complete the `instruction` field in the given JSON data point;\n"
26
- "- Replace all placeholders like LOC_1, LOC_2 with real global place names;\n"
27
- "- Also update the `steps` so that LOC_1, LOC_2 are replaced accordingly;\n"
28
- "- The final `instruction` should NOT contain any LOC_x placeholders;\n"
29
- "- Create a realistic scenario, such as navigation, trip planning, or station setup;\n"
30
- "- First explain your reasoning: why you chose this scenario and these places;\n"
31
- "- Then output a JSON object with the following format, enclosed between ```json and ```:\n\n"
32
- "Format:\n"
33
- "```json\n"
34
- "{\n"
35
- ' "index": xxx,\n'
36
- ' "instruction": "....",\n'
37
- ' "steps": [\n'
38
- " {\"id\": 1, \"function\": ..., \"inputs\": [...]},\n"
39
- " ...\n"
40
- " ]\n"
41
- "}\n"
42
- "```"
43
- )
44
-
45
- def extract_json_block(text):
46
- match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)
47
- if match:
48
- try:
49
- return json.loads(match.group(1))
50
- except json.JSONDecodeError:
51
- return None
52
- return None
53
-
54
- def format_step(step):
55
- inputs = [
56
- f'"{inp}"' if isinstance(inp, str) else str(inp)
57
- for inp in step["inputs"]
58
- ]
59
- return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": [{", ".join(inputs)}]}}'
60
-
61
- # Load original data
62
- with open(input_path, "r", encoding="utf-8") as f:
63
- data = json.load(f)
64
-
65
- completed = 0
66
- output_data = []
67
-
68
- for item in data:
69
- original_item = item.copy()
70
-
71
- if item.get("instruction", "").strip() == "":
72
- if completed >= n:
73
- break
74
-
75
- user_prompt = f"Here is a geospatial task. Please generate an appropriate instruction according to the rules:\n{json.dumps(item, ensure_ascii=False)}"
76
-
77
- try:
78
- response = client.chat.completions.create(
79
- model=model,
80
- messages=[
81
- {"role": "system", "content": system_prompt},
82
- {"role": "user", "content": user_prompt},
83
- ]
84
- )
85
- content = response.choices[0].message.content.strip()
86
- parsed = extract_json_block(content)
87
- item = parsed
88
- completed += 1
89
-
90
- except Exception as e:
91
- print(f"[✗] index {item['index']} error: {e}")
92
- item = original_item
93
- continue
94
-
95
- with open('localization_with_instruction.json', "r", encoding="utf-8") as f:
96
- try:
97
- datapoint = json.load(f)
98
- except:
99
- datapoint = []
100
-
101
- datapoint.append(item)
102
-
103
- with open(output_path, "w", encoding="utf-8") as f:
104
- json.dump(datapoint, f, ensure_ascii=False, indent=2)
105
-
106
-
107
- time.sleep(1.5)
108
-
109
- output_data.append(item)
110
-
111
- # # Save full output as JSON
112
- # with open(output_path, "w", encoding="utf-8") as f:
113
- # json.dump(output_data, f, ensure_ascii=False, indent=2)
114
-
115
- print(f"\n✅ Completed {completed} data points. Output saved to {output_path}")
116
-
117
- def write_custom_json(data, filename):
118
- def format_step(step):
119
- inputs = json.dumps(step["inputs"], ensure_ascii=False)
120
- return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}'
121
-
122
- with open(filename, "w", encoding="utf-8") as f:
123
- f.write("[\n")
124
- for i, item in enumerate(data):
125
- f.write(" {\n")
126
- f.write(f' "index": {item["index"]},\n')
127
- instruction = json.dumps(item["instruction"], ensure_ascii=False)
128
- f.write(f' "instruction": {instruction},\n')
129
- f.write(' "steps": [\n')
130
- step_lines = [f" {format_step(step)}" for step in item["steps"]]
131
- f.write(",\n".join(step_lines))
132
- f.write("\n ]\n")
133
- f.write(" }" + (",\n" if i < len(data) - 1 else "\n"))
134
- f.write("]\n")
135
-
136
- # Regenerate custom formatted JSON
137
- with open("localization_with_instruction.json", "r", encoding="utf-8") as f:
138
- data = json.load(f)
139
-
140
- write_custom_json(data, "localization_with_instruction.json")