Spaces:
Runtime error
Runtime error
File size: 16,711 Bytes
17e77ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 |
import requests
import urllib3
import json
from utils import geoutil
import regex_spatial
from shapely.geometry import Polygon, MultiPoint, LineString, Point, mapping
import re
import geopandas as gpd
from geocoder import geo_level1
from openai import OpenAI
import numpy as np
client = OpenAI(
api_key='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'
)
model = "gpt-4o"
north = ["north", "N'", "North", "NORTH"]
south = ["south", "S'", "South", "SOUTH"]
east = ["east", "E'", "East", "EAST"]
west = ["west", "W'", "West", "WEST"]
northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"]
southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"]
northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"]
southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"]
center = ["center","central", "downtown","midtown"]
dataset_path = 'dataset/dataset_20.json'
def get_geojson(ent, arr, centroid):
poly_json = {}
poly_json['type'] = 'FeatureCollection'
poly_json['features'] = []
coordinates= []
coordinates.append(arr)
poly_json['features'].append({
'type':'Feature',
'id': ent,
'properties': {
'centroid': centroid
},
'geometry': {
'type':'Polygon',
'coordinates': coordinates
}
})
return poly_json
def get_coordinates(ent):
request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2'
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15"
}
page = requests.get(request_url, headers=headers, verify=False)
json_content = json.loads(page.content)
all_coordinates = json_content[0]['geojson']['coordinates'][0]
centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
for p in all_coordinates:
p2 = (p[0], p[1])
angle = geoutil.calculate_bearing(centroid, p2)
p.append(angle)
geojson = get_geojson(ent, all_coordinates, centroid)
return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid']
# level3
def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum):
# minimum = 157
# maximum = 202
direction_coordinates = []
for p in coordinates:
angle = geoutil.calculate_bearing(centroid, p)
p2 = (p[0], p[1], angle)
if direction in geo_level1.east:
if angle >= minimum or angle <= maximum:
direction_coordinates.append(p2)
else:
if angle >= minimum and angle <= maximum:
direction_coordinates.append(p2)
# print(type(direction_coordinates[0]))
# if(direction in geo_level1.west):
# direction_coordinates.sort(key=lambda k: k[2], reverse=True)
return direction_coordinates
def get_level3(level3):
digits = re.findall('[0-9]+', level3)[0]
unit = re.findall('[A-Za-z]+', level3)[0]
return digits, unit
def get_direction_coordinates(coordinates, centroid, level1):
min_max = geo_level1.get_min_max(level1)
if min_max is not None:
coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1])
return coord
return coordinates
def sort_west(poly1, poly2, centroid):
coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"]
coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"]
coord1 = []
coord2 = []
coord = []
for c in coords1:
pol = list(c[::-1])
coord1.extend(pol)
for c in coords2:
pol = list(c[::-1])
coord2.extend(pol)
coo1 = []
coo2 = []
for p in coord1:
angle = geoutil.calculate_bearing(centroid, p)
if angle >= 157 and angle <= 202:
coo1.append((p[0], p[1], angle))
for p in coord2:
angle = geoutil.calculate_bearing(centroid, p)
if angle >= 157 and angle <= 202:
coo2.append((p[0], p[1], angle))
coo1.extend(coo2)
return coo1
def get_level3_coordinates(coordinates, level_3, level1):
distance, unit = get_level3(level_3)
kms = geoutil.get_kilometers(distance, unit)
coord = []
poly1 = Polygon(coordinates[0])
polygon1 = gpd.GeoSeries(poly1)
poly2 = polygon1.buffer(0.0095 * kms, join_style=2)
poly3 = polygon1.buffer(0.013 * kms, join_style=2)
poly = poly3.difference(poly2)
coords = mapping(poly)["features"][0]["geometry"]["coordinates"]
for c in coords:
pol = list(c[::-1])
coord.extend(pol)
if level1 is not None:
coord = get_direction_coordinates(coord, coordinates[1], level1)
if level1 in geo_level1.west:
coord = sort_west(poly3, poly2, coordinates[1])
# 计算新 coord 的几何中心(质心)
if coord:
center_point = MultiPoint(coord).centroid
center = (center_point.x, center_point.y)
else:
center = coordinates[1] # fallback: 原始中心点
return coord, center
# level 3 end
# between
def get_between_coordinates(coordinates1, coordinates2):
"""
计算两个区域之间的中间点,并生成一个等面积的圆形区域。
:param coordinates1: 第一个区域的边界坐标和中心点
:param coordinates2: 第二个区域的边界坐标和中心点
:return: 圆形区域的坐标集和圆心
"""
# 创建多边形对象
poly1 = Polygon(coordinates1[0])
poly2 = Polygon(coordinates2[0])
# 计算两个区域的面积(近似 km²,需进一步优化投影转换)
area1 = poly1.area
area2 = poly2.area
# 计算平均面积
avg_area = (area1 + area2) / 2
# 计算等面积圆的半径 r(单位 km)
r_km = np.sqrt(avg_area / np.pi) * 111.32 # 使得 πr² ≈ avg_area
# 计算圆心(两个中心点的中点)
midpoint = ((coordinates1[1][0] + coordinates2[1][0]) / 2, (coordinates1[1][1] + coordinates2[1][1]) / 2)
# 计算地球上的 1 度经纬度距离(近似值)
lat_km = 111.32 # 1 度纬度 ≈ 111.32 km
lon_km = 111.32 * np.cos(np.radians(midpoint[1])) # 1 度经度 ≈ 111.32 × cos(纬度)
# 计算以 midpoint 为圆心,半径 r_km 的圆形坐标集
circle_points = []
for theta in np.linspace(0, 360, num=100): # 生成 100 个点构成圆形
theta_rad = np.radians(theta)
d_lat = (np.sin(theta_rad) * r_km) / lat_km
d_lon = (np.cos(theta_rad) * r_km) / lon_km
circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat))
return circle_points, midpoint
# between end
def llmapi(text):
system_prompt = (
"你是一个资深的地理学家,你的任务是通过给定的一段自然语言,来选择正确的定位函数顺序以及他们的输入。\n"
"你能选择的定位函数有:\n"
"1. 相对定位(Relative Positioning):输入为地点坐标,方位,距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n"
"2. 中间定位(Between Positioning):输入为两个地点的坐标,输出为两个地点坐标的中点。\n"
"请先进行思维链(CoT)推理,并最终用 JSON 格式输出你的答案,用 `<<<JSON>>>` 和 `<<<END>>>` 包裹起来。\n"
"请确保所有输入仅包含:地点名称(字符串)、索引(整数)、方位(字符串,必须是英文)或距离(字符串,带单位),不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n"
"每个步骤编号都有 id 记录,然后如果某个输入是之前步骤的输出,那么输入对应步骤的 id。\n"
"所有方向必须使用英文(如 south, west, northeast, etc.)。\n"
"示例输出:\n"
"<<<JSON>>>\n"
"[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
"{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
"{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
"{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
"<<<END>>>")
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": text},
]
chat_completion = client.chat.completions.create(
messages=messages,
model=model,
)
result = chat_completion.choices[0].message.content
json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)
if json_match:
# print(json.loads(json_match.group(1)))
return json.loads(json_match.group(1))
else:
raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
def llmapi(text):
system_prompt = (
"You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n"
"The positioning functions you can choose from are:\n"
"1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n"
"2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n"
"You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations."
"First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<<JSON>>>` and `<<<END>>>`.\n"
"Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n"
"Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n"
"All directions must be in English (e.g., south, west, northeast, etc.).\n"
"Example output:\n"
"<<<JSON>>>\n"
"[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
"{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
"{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
"{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
"<<<END>>>")
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": text},
]
chat_completion = client.chat.completions.create(
messages=messages,
model=model,
)
result = chat_completion.choices[0].message.content
print(result)
json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)
if json_match:
return json.loads(json_match.group(1))
else:
raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
def get_coordinates(location):
request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2'
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(request_url, headers=headers, verify=False)
json_content = json.loads(response.content)
coordinates = json_content[0]['geojson']['coordinates'][0]
centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
return (coordinates, centroid)
def execute_steps(steps):
data = {}
for step in steps:
step_id = step['id']
function = step['function']
inputs = step['inputs']
# print('-' * 50)
# print(function)
# print(inputs)
resolved_inputs = []
for inp in inputs:
if isinstance(inp, int):
resolved_inputs.append(data[inp])
else:
resolved_inputs.append(inp)
if function == "Relative":
location, direction, distance = resolved_inputs
if isinstance(location, str):
location = get_coordinates(location)
result = get_level3_coordinates(location, distance, direction)
print(location, distance, direction, 'rrr')
print(result)
data[step_id] = result
elif function == "Between":
location1, location2 = resolved_inputs
if isinstance(location1, str):
location1 = get_coordinates(location1)
location1 = [location1[0]] + list(location1[1:])
# location1 = [location1[0][0]] + list(location1[1:])
# location1[0] = location1[0]
if isinstance(location2, str):
location2 = get_coordinates(location2)
print(location2)
location2 = [location2[0]] + list(location2[1:])
# location2 = [location2[0][0]] + list(location2[1:])
print(location1)
result = get_between_coordinates(location1, location2)
print(location1, location2, 'bbb')
print(result)
data[step_id] = result
return data
# a = get_coordinates('Burwood')
# a2 = get_coordinates('Glebe')
# b = get_level3_coordinates(a, '5 km', 'east')
# c = get_between_coordinates(a, a2)
# 完整通道
# 默认输入
default_input_text = "在Chatswood南边4公里与North Sydney 东边2公里的中间的西南5公里。"
default_input_text = "你是一位规划师,正在为华盛顿州的一项新森林监测站选址。两个潜在的参考位置分别是雷尼尔山国家公园(Mount Rainier National Park)和北喀斯喀特国家公园(North Cascades National Park)。首先,你想在这两个国家公园之间找到一个中间点。接着,你希望在这个中间点与北喀斯喀特国家公园之间,再取一个中间位置,以便确定最终的建设候选地。"
default_input_text = "在Chatswood和North Sydney的中间靠近North Sydney的四分之一位置"
default_input_text = "Plan a trip that involves determining the midpoint between Paris and London, and then finding another midpoint between this location and Paris to identify potential stopovers during travel."
# default_input_text = "5km southwest of Chatswood, 4km south of Chatswood and 2km north of North Sydney."
with open(dataset_path, 'r', encoding='utf-8') as f:
data = json.load(f)
answer_path = 'answer/GPT4o.json'
answer = []
for i in data:
parsed_steps = llmapi(i['instruction'])
# parsed_steps = [{"id": 1, "function": "Between", "inputs": ["Chatswood", "North Sydney"]},{"id": 2, "function": "Between", "inputs": [1, "North Sydney"]}]
i["steps"] = parsed_steps
# print(i['instruction'])
with open(answer_path, "r", encoding="utf-8") as f:
try:
datapoint = json.load(f)
except:
datapoint = []
datapoint.append(i)
# print(answer)
with open(answer_path, "w", encoding="utf-8") as f:
json.dump(datapoint, f, ensure_ascii=False, indent=2)
# 格式转化
def write_custom_json(data, filename):
def format_step(step):
inputs = json.dumps(step["inputs"], ensure_ascii=False)
return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}'
with open(filename, "w", encoding="utf-8") as f:
f.write("[\n")
for i, item in enumerate(data):
f.write(" {\n")
f.write(f' "index": {item["index"]},\n')
instruction = json.dumps(item["instruction"], ensure_ascii=False)
f.write(f' "instruction": {instruction},\n')
f.write(' "steps": [\n')
step_lines = [f" {format_step(step)}" for step in item["steps"]]
f.write(",\n".join(step_lines))
f.write("\n ]\n")
f.write(" }" + (",\n" if i < len(data) - 1 else "\n"))
f.write("]\n")
# Regenerate custom formatted JSON
with open(answer_path, "r", encoding="utf-8") as f:
data = json.load(f)
write_custom_json(data, answer_path) |