File size: 16,711 Bytes
17e77ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
import requests
import urllib3
import json
from utils import geoutil
import regex_spatial
from shapely.geometry import Polygon, MultiPoint, LineString, Point, mapping
import re
import geopandas as gpd
from geocoder import geo_level1
from openai import OpenAI
import numpy as np

client = OpenAI(
    api_key='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'
)


model = "gpt-4o"

north = ["north", "N'", "North", "NORTH"]
south = ["south", "S'", "South", "SOUTH"]
east = ["east", "E'", "East", "EAST"]
west = ["west", "W'", "West", "WEST"]
northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"]
southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"]
northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"]
southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"]
center = ["center","central", "downtown","midtown"]

dataset_path = 'dataset/dataset_20.json'

def get_geojson(ent, arr, centroid):
    poly_json = {}
    poly_json['type'] = 'FeatureCollection'
    poly_json['features'] = []
    coordinates= []
    coordinates.append(arr)
    poly_json['features'].append({
    'type':'Feature',
    'id': ent,
    'properties': {
        'centroid': centroid
        },
    'geometry': {
        'type':'Polygon',
        'coordinates': coordinates
        }
    })
    return poly_json


def get_coordinates(ent):
    request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2'
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15"
    }
    page = requests.get(request_url, headers=headers, verify=False)
    json_content = json.loads(page.content)
    all_coordinates = json_content[0]['geojson']['coordinates'][0]
    centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
    for p in all_coordinates:
        p2 = (p[0], p[1])
        angle = geoutil.calculate_bearing(centroid, p2)
        p.append(angle)

    geojson = get_geojson(ent, all_coordinates, centroid)

    return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid']


# level3
def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum):
    # minimum = 157
    # maximum = 202

    direction_coordinates = []
    for p in coordinates:
        angle = geoutil.calculate_bearing(centroid, p)
        p2 = (p[0], p[1], angle)
        if direction in geo_level1.east:
            if angle >= minimum or angle <= maximum:
                direction_coordinates.append(p2)

        else:
            if angle >= minimum and angle <= maximum:
                direction_coordinates.append(p2)
    # print(type(direction_coordinates[0]))
    # if(direction in geo_level1.west):
    #    direction_coordinates.sort(key=lambda k: k[2], reverse=True)

    return direction_coordinates
def get_level3(level3):
    digits = re.findall('[0-9]+', level3)[0]
    unit = re.findall('[A-Za-z]+', level3)[0]
    return digits, unit

def get_direction_coordinates(coordinates, centroid, level1):
    min_max = geo_level1.get_min_max(level1)
    if min_max is not None:
        coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1])
        return coord
    return coordinates
def sort_west(poly1, poly2, centroid):
    coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"]
    coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"]
    coord1 = []
    coord2 = []
    coord = []
    for c in coords1:
        pol = list(c[::-1])
        coord1.extend(pol)
    for c in coords2:
        pol = list(c[::-1])
        coord2.extend(pol)
    coo1 = []
    coo2 = []
    for p in coord1:
        angle = geoutil.calculate_bearing(centroid, p)
        if angle >= 157 and angle <= 202:
            coo1.append((p[0], p[1], angle))
    for p in coord2:
        angle = geoutil.calculate_bearing(centroid, p)
        if angle >= 157 and angle <= 202:
            coo2.append((p[0], p[1], angle))
    coo1.extend(coo2)
    return coo1


def get_level3_coordinates(coordinates, level_3, level1):
    distance, unit = get_level3(level_3)
    kms = geoutil.get_kilometers(distance, unit)
    coord = []

    poly1 = Polygon(coordinates[0])
    polygon1 = gpd.GeoSeries(poly1)
    poly2 = polygon1.buffer(0.0095 * kms, join_style=2)
    poly3 = polygon1.buffer(0.013 * kms, join_style=2)
    poly = poly3.difference(poly2)

    coords = mapping(poly)["features"][0]["geometry"]["coordinates"]

    for c in coords:
        pol = list(c[::-1])
        coord.extend(pol)

    if level1 is not None:
        coord = get_direction_coordinates(coord, coordinates[1], level1)
        if level1 in geo_level1.west:
            coord = sort_west(poly3, poly2, coordinates[1])

    # 计算新 coord 的几何中心(质心)
    if coord:
        center_point = MultiPoint(coord).centroid
        center = (center_point.x, center_point.y)
    else:
        center = coordinates[1]  # fallback: 原始中心点

    return coord, center
# level 3 end

# between
def get_between_coordinates(coordinates1, coordinates2):
    """
    计算两个区域之间的中间点,并生成一个等面积的圆形区域。
    :param coordinates1: 第一个区域的边界坐标和中心点
    :param coordinates2: 第二个区域的边界坐标和中心点
    :return: 圆形区域的坐标集和圆心
    """
    # 创建多边形对象
    poly1 = Polygon(coordinates1[0])
    poly2 = Polygon(coordinates2[0])

    # 计算两个区域的面积(近似 km²,需进一步优化投影转换)
    area1 = poly1.area
    area2 = poly2.area

    # 计算平均面积
    avg_area = (area1 + area2) / 2

    # 计算等面积圆的半径 r(单位 km)
    r_km = np.sqrt(avg_area / np.pi) * 111.32  # 使得 πr² ≈ avg_area

    # 计算圆心(两个中心点的中点)
    midpoint = ((coordinates1[1][0] + coordinates2[1][0]) / 2, (coordinates1[1][1] + coordinates2[1][1]) / 2)

    # 计算地球上的 1 度经纬度距离(近似值)
    lat_km = 111.32  # 1 度纬度 ≈ 111.32 km
    lon_km = 111.32 * np.cos(np.radians(midpoint[1]))  # 1 度经度 ≈ 111.32 × cos(纬度)

    # 计算以 midpoint 为圆心,半径 r_km 的圆形坐标集
    circle_points = []
    for theta in np.linspace(0, 360, num=100):  # 生成 100 个点构成圆形
        theta_rad = np.radians(theta)
        d_lat = (np.sin(theta_rad) * r_km) / lat_km
        d_lon = (np.cos(theta_rad) * r_km) / lon_km
        circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat))

    return circle_points, midpoint
# between end


def llmapi(text):
    system_prompt = (
        "你是一个资深的地理学家,你的任务是通过给定的一段自然语言,来选择正确的定位函数顺序以及他们的输入。\n"
        "你能选择的定位函数有:\n"
        "1. 相对定位(Relative Positioning):输入为地点坐标,方位,距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n"
        "2. 中间定位(Between Positioning):输入为两个地点的坐标,输出为两个地点坐标的中点。\n"
        "请先进行思维链(CoT)推理,并最终用 JSON 格式输出你的答案,用 `<<<JSON>>>` 和 `<<<END>>>` 包裹起来。\n"
        "请确保所有输入仅包含:地点名称(字符串)、索引(整数)、方位(字符串,必须是英文)或距离(字符串,带单位),不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n"
        "每个步骤编号都有 id 记录,然后如果某个输入是之前步骤的输出,那么输入对应步骤的 id。\n"
        "所有方向必须使用英文(如 south, west, northeast, etc.)。\n"
        "示例输出:\n"
        "<<<JSON>>>\n"
        "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
        "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
        "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
        "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
        "<<<END>>>")

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": text},
    ]

    chat_completion = client.chat.completions.create(
        messages=messages,
        model=model,
    )

    result = chat_completion.choices[0].message.content
    json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)

    if json_match:
        # print(json.loads(json_match.group(1)))
        return json.loads(json_match.group(1))
    else:
        raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
def llmapi(text):
    system_prompt = (
        "You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n"
        "The positioning functions you can choose from are:\n"
        "1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n"
        "2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n"
        "You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations."
        "First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<<JSON>>>` and `<<<END>>>`.\n"
        "Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n"
        "Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n"
        "All directions must be in English (e.g., south, west, northeast, etc.).\n"
        "Example output:\n"
        "<<<JSON>>>\n"
        "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
        "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
        "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
        "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
        "<<<END>>>")

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": text},
    ]

    chat_completion = client.chat.completions.create(
        messages=messages,
        model=model,
    )

    result = chat_completion.choices[0].message.content
    print(result)
    json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)

    if json_match:
        return json.loads(json_match.group(1))
    else:
        raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")


def get_coordinates(location):
    request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2'
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(request_url, headers=headers, verify=False)
    json_content = json.loads(response.content)
    coordinates = json_content[0]['geojson']['coordinates'][0]
    centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
    return (coordinates, centroid)


def execute_steps(steps):
    data = {}

    for step in steps:
        step_id = step['id']
        function = step['function']
        inputs = step['inputs']
        # print('-' * 50)
        # print(function)
        # print(inputs)


        resolved_inputs = []
        for inp in inputs:
            if isinstance(inp, int):
                resolved_inputs.append(data[inp])
            else:
                resolved_inputs.append(inp)
        if function == "Relative":
            location, direction, distance = resolved_inputs
            if isinstance(location, str):
                location = get_coordinates(location)
            result = get_level3_coordinates(location, distance, direction)
            print(location, distance, direction, 'rrr')
            print(result)
            data[step_id] = result

        elif function == "Between":
            location1, location2 = resolved_inputs
            if isinstance(location1, str):
                location1 = get_coordinates(location1)
                location1 = [location1[0]] + list(location1[1:])
                # location1 = [location1[0][0]] + list(location1[1:])


                # location1[0] = location1[0]
            if isinstance(location2, str):
                location2 = get_coordinates(location2)
                print(location2)
                location2 = [location2[0]] + list(location2[1:])
                # location2 = [location2[0][0]] + list(location2[1:])


            print(location1)
            result = get_between_coordinates(location1, location2)
            print(location1, location2, 'bbb')
            print(result)
            data[step_id] = result

    return data

# a = get_coordinates('Burwood')
# a2 = get_coordinates('Glebe')
# b = get_level3_coordinates(a, '5 km', 'east')
# c = get_between_coordinates(a, a2)

# 完整通道
# 默认输入
default_input_text = "在Chatswood南边4公里与North Sydney 东边2公里的中间的西南5公里。"
default_input_text = "你是一位规划师,正在为华盛顿州的一项新森林监测站选址。两个潜在的参考位置分别是雷尼尔山国家公园(Mount Rainier National Park)和北喀斯喀特国家公园(North Cascades National Park)。首先,你想在这两个国家公园之间找到一个中间点。接着,你希望在这个中间点与北喀斯喀特国家公园之间,再取一个中间位置,以便确定最终的建设候选地。"
default_input_text = "在Chatswood和North Sydney的中间靠近North Sydney的四分之一位置"
default_input_text = "Plan a trip that involves determining the midpoint between Paris and London, and then finding another midpoint between this location and Paris to identify potential stopovers during travel."
# default_input_text = "5km southwest of Chatswood, 4km south of Chatswood and 2km north of North Sydney."





with open(dataset_path, 'r', encoding='utf-8') as f:
    data = json.load(f)


answer_path = 'answer/GPT4o.json'



answer = []
for i in data:
    parsed_steps = llmapi(i['instruction'])
    # parsed_steps = [{"id": 1, "function": "Between", "inputs": ["Chatswood", "North Sydney"]},{"id": 2, "function": "Between", "inputs": [1, "North Sydney"]}]
    i["steps"] = parsed_steps



    # print(i['instruction'])

    with open(answer_path, "r", encoding="utf-8") as f:
        try:
            datapoint = json.load(f)
        except:
            datapoint = []

    datapoint.append(i)
    # print(answer)
    with open(answer_path, "w", encoding="utf-8") as f:
        json.dump(datapoint, f, ensure_ascii=False, indent=2)



# 格式转化
def write_custom_json(data, filename):
    def format_step(step):
        inputs = json.dumps(step["inputs"], ensure_ascii=False)
        return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}'

    with open(filename, "w", encoding="utf-8") as f:
        f.write("[\n")
        for i, item in enumerate(data):
            f.write("  {\n")
            f.write(f'    "index": {item["index"]},\n')
            instruction = json.dumps(item["instruction"], ensure_ascii=False)
            f.write(f'    "instruction": {instruction},\n')
            f.write('    "steps": [\n')
            step_lines = [f"      {format_step(step)}" for step in item["steps"]]
            f.write(",\n".join(step_lines))
            f.write("\n    ]\n")
            f.write("  }" + (",\n" if i < len(data) - 1 else "\n"))
        f.write("]\n")

# Regenerate custom formatted JSON
with open(answer_path, "r", encoding="utf-8") as f:
    data = json.load(f)

write_custom_json(data, answer_path)