Spaces:

SpatialWebAgent
/

SpatialParse

Runtime error

App Files Files Community

Shunfeng Zheng commited on 28 days ago

Commit

f6dcde4

verified ·

1 Parent(s): 5ba8304

Delete 题目转化.py

Browse files

Files changed (1) hide show

题目转化.py +0 -140

题目转化.py DELETED Viewed

@@ -1,140 +0,0 @@
-import json
-import re
-import time
-from openai import OpenAI
-# ========= Parameters =========
-n = 100  # Only generate for the first n data points with empty instruction
-input_path = "localization_samples.json"
-output_path = "localization_with_instruction.json"
-model = "gpt-4o"
-# ==============================
-client = OpenAI(api_key="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")
-system_prompt = (
-    "You are a geospatial task design expert. The user will provide a JSON data point containing a series of function-based steps (`steps`). "
-    "Your job is to generate a natural and meaningful instruction (`instruction`) based on these steps.\n\n"
-    "There are four types of functions you might encounter:\n"
-    "1. Coords: Input is a location name, output is the coordinates of that location.\n"
-    "2. Relative: Input is a region (or place name), a direction, and a distance. Output is a new region offset by that direction and distance.\n"
-    "3. Between: Input is two coordinates (or place names), output is the midpoint between them.\n"
-    "4. Azimuth: Similar to Relative, but the direction is represented as an angle instead of a word.\n\n"
-    "Your tasks are:\n"
-    "- Complete the `instruction` field in the given JSON data point;\n"
-    "- Replace all placeholders like LOC_1, LOC_2 with real global place names;\n"
-    "- Also update the `steps` so that LOC_1, LOC_2 are replaced accordingly;\n"
-    "- The final `instruction` should NOT contain any LOC_x placeholders;\n"
-    "- Create a realistic scenario, such as navigation, trip planning, or station setup;\n"
-    "- First explain your reasoning: why you chose this scenario and these places;\n"
-    "- Then output a JSON object with the following format, enclosed between ```json and ```:\n\n"
-    "Format:\n"
-    "```json\n"
-    "{\n"
-    '  "index": xxx,\n'
-    '  "instruction": "....",\n'
-    '  "steps": [\n'
-    "    {\"id\": 1, \"function\": ..., \"inputs\": [...]},\n"
-    "    ...\n"
-    "  ]\n"
-    "}\n"
-    "```"
-)
-def extract_json_block(text):
-    match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)
-    if match:
-        try:
-            return json.loads(match.group(1))
-        except json.JSONDecodeError:
-            return None
-    return None
-def format_step(step):
-    inputs = [
-        f'"{inp}"' if isinstance(inp, str) else str(inp)
-        for inp in step["inputs"]
-    ]
-    return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": [{", ".join(inputs)}]}}'
-# Load original data
-with open(input_path, "r", encoding="utf-8") as f:
-    data = json.load(f)
-completed = 0
-output_data = []
-for item in data:
-    original_item = item.copy()
-    if item.get("instruction", "").strip() == "":
-        if completed >= n:
-            break
-        user_prompt = f"Here is a geospatial task. Please generate an appropriate instruction according to the rules:\n{json.dumps(item, ensure_ascii=False)}"
-        try:
-            response = client.chat.completions.create(
-                model=model,
-                messages=[
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt},
-                ]
-            )
-            content = response.choices[0].message.content.strip()
-            parsed = extract_json_block(content)
-            item = parsed
-            completed += 1
-        except Exception as e:
-            print(f"[✗] index {item['index']} error: {e}")
-            item = original_item
-            continue
-        with open('localization_with_instruction.json', "r", encoding="utf-8") as f:
-            try:
-                datapoint = json.load(f)
-            except:
-                datapoint = []
-        datapoint.append(item)
-        with open(output_path, "w", encoding="utf-8") as f:
-            json.dump(datapoint, f, ensure_ascii=False, indent=2)
-        time.sleep(1.5)
-    output_data.append(item)
-# # Save full output as JSON
-# with open(output_path, "w", encoding="utf-8") as f:
-#     json.dump(output_data, f, ensure_ascii=False, indent=2)
-print(f"\n✅ Completed {completed} data points. Output saved to {output_path}")
-def write_custom_json(data, filename):
-    def format_step(step):
-        inputs = json.dumps(step["inputs"], ensure_ascii=False)
-        return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}'
-    with open(filename, "w", encoding="utf-8") as f:
-        f.write("[\n")
-        for i, item in enumerate(data):
-            f.write("  {\n")
-            f.write(f'    "index": {item["index"]},\n')
-            instruction = json.dumps(item["instruction"], ensure_ascii=False)
-            f.write(f'    "instruction": {instruction},\n')
-            f.write('    "steps": [\n')
-            step_lines = [f"      {format_step(step)}" for step in item["steps"]]
-            f.write(",\n".join(step_lines))
-            f.write("\n    ]\n")
-            f.write("  }" + (",\n" if i < len(data) - 1 else "\n"))
-        f.write("]\n")
-# Regenerate custom formatted JSON
-with open("localization_with_instruction.json", "r", encoding="utf-8") as f:
-    data = json.load(f)
-write_custom_json(data, "localization_with_instruction.json")