Spaces:
Runtime error
Runtime error
import requests | |
import urllib3 | |
import json | |
from utils import geoutil | |
import regex_spatial | |
from shapely.geometry import Polygon, MultiPoint, LineString, Point, mapping | |
import re | |
import geopandas as gpd | |
from geocoder import geo_level1 | |
from openai import OpenAI | |
import numpy as np | |
client = OpenAI( | |
api_key='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' | |
) | |
model = "gpt-4o" | |
north = ["north", "N'", "North", "NORTH"] | |
south = ["south", "S'", "South", "SOUTH"] | |
east = ["east", "E'", "East", "EAST"] | |
west = ["west", "W'", "West", "WEST"] | |
northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"] | |
southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"] | |
northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"] | |
southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"] | |
center = ["center","central", "downtown","midtown"] | |
dataset_path = 'dataset/dataset_20.json' | |
def get_geojson(ent, arr, centroid): | |
poly_json = {} | |
poly_json['type'] = 'FeatureCollection' | |
poly_json['features'] = [] | |
coordinates= [] | |
coordinates.append(arr) | |
poly_json['features'].append({ | |
'type':'Feature', | |
'id': ent, | |
'properties': { | |
'centroid': centroid | |
}, | |
'geometry': { | |
'type':'Polygon', | |
'coordinates': coordinates | |
} | |
}) | |
return poly_json | |
def get_coordinates(ent): | |
request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2' | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15" | |
} | |
page = requests.get(request_url, headers=headers, verify=False) | |
json_content = json.loads(page.content) | |
all_coordinates = json_content[0]['geojson']['coordinates'][0] | |
centroid = (float(json_content[0]['lon']), float(json_content[0]['lat'])) | |
for p in all_coordinates: | |
p2 = (p[0], p[1]) | |
angle = geoutil.calculate_bearing(centroid, p2) | |
p.append(angle) | |
geojson = get_geojson(ent, all_coordinates, centroid) | |
return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid'] | |
# level3 | |
def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum): | |
# minimum = 157 | |
# maximum = 202 | |
direction_coordinates = [] | |
for p in coordinates: | |
angle = geoutil.calculate_bearing(centroid, p) | |
p2 = (p[0], p[1], angle) | |
if direction in geo_level1.east: | |
if angle >= minimum or angle <= maximum: | |
direction_coordinates.append(p2) | |
else: | |
if angle >= minimum and angle <= maximum: | |
direction_coordinates.append(p2) | |
# print(type(direction_coordinates[0])) | |
# if(direction in geo_level1.west): | |
# direction_coordinates.sort(key=lambda k: k[2], reverse=True) | |
return direction_coordinates | |
def get_level3(level3): | |
digits = re.findall('[0-9]+', level3)[0] | |
unit = re.findall('[A-Za-z]+', level3)[0] | |
return digits, unit | |
def get_direction_coordinates(coordinates, centroid, level1): | |
min_max = geo_level1.get_min_max(level1) | |
if min_max is not None: | |
coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1]) | |
return coord | |
return coordinates | |
def sort_west(poly1, poly2, centroid): | |
coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"] | |
coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"] | |
coord1 = [] | |
coord2 = [] | |
coord = [] | |
for c in coords1: | |
pol = list(c[::-1]) | |
coord1.extend(pol) | |
for c in coords2: | |
pol = list(c[::-1]) | |
coord2.extend(pol) | |
coo1 = [] | |
coo2 = [] | |
for p in coord1: | |
angle = geoutil.calculate_bearing(centroid, p) | |
if angle >= 157 and angle <= 202: | |
coo1.append((p[0], p[1], angle)) | |
for p in coord2: | |
angle = geoutil.calculate_bearing(centroid, p) | |
if angle >= 157 and angle <= 202: | |
coo2.append((p[0], p[1], angle)) | |
coo1.extend(coo2) | |
return coo1 | |
def get_level3_coordinates(coordinates, level_3, level1): | |
distance, unit = get_level3(level_3) | |
kms = geoutil.get_kilometers(distance, unit) | |
coord = [] | |
poly1 = Polygon(coordinates[0]) | |
polygon1 = gpd.GeoSeries(poly1) | |
poly2 = polygon1.buffer(0.0095 * kms, join_style=2) | |
poly3 = polygon1.buffer(0.013 * kms, join_style=2) | |
poly = poly3.difference(poly2) | |
coords = mapping(poly)["features"][0]["geometry"]["coordinates"] | |
for c in coords: | |
pol = list(c[::-1]) | |
coord.extend(pol) | |
if level1 is not None: | |
coord = get_direction_coordinates(coord, coordinates[1], level1) | |
if level1 in geo_level1.west: | |
coord = sort_west(poly3, poly2, coordinates[1]) | |
# 计算新 coord 的几何中心(质心) | |
if coord: | |
center_point = MultiPoint(coord).centroid | |
center = (center_point.x, center_point.y) | |
else: | |
center = coordinates[1] # fallback: 原始中心点 | |
return coord, center | |
# level 3 end | |
# between | |
def get_between_coordinates(coordinates1, coordinates2): | |
""" | |
计算两个区域之间的中间点,并生成一个等面积的圆形区域。 | |
:param coordinates1: 第一个区域的边界坐标和中心点 | |
:param coordinates2: 第二个区域的边界坐标和中心点 | |
:return: 圆形区域的坐标集和圆心 | |
""" | |
# 创建多边形对象 | |
poly1 = Polygon(coordinates1[0]) | |
poly2 = Polygon(coordinates2[0]) | |
# 计算两个区域的面积(近似 km²,需进一步优化投影转换) | |
area1 = poly1.area | |
area2 = poly2.area | |
# 计算平均面积 | |
avg_area = (area1 + area2) / 2 | |
# 计算等面积圆的半径 r(单位 km) | |
r_km = np.sqrt(avg_area / np.pi) * 111.32 # 使得 πr² ≈ avg_area | |
# 计算圆心(两个中心点的中点) | |
midpoint = ((coordinates1[1][0] + coordinates2[1][0]) / 2, (coordinates1[1][1] + coordinates2[1][1]) / 2) | |
# 计算地球上的 1 度经纬度距离(近似值) | |
lat_km = 111.32 # 1 度纬度 ≈ 111.32 km | |
lon_km = 111.32 * np.cos(np.radians(midpoint[1])) # 1 度经度 ≈ 111.32 × cos(纬度) | |
# 计算以 midpoint 为圆心,半径 r_km 的圆形坐标集 | |
circle_points = [] | |
for theta in np.linspace(0, 360, num=100): # 生成 100 个点构成圆形 | |
theta_rad = np.radians(theta) | |
d_lat = (np.sin(theta_rad) * r_km) / lat_km | |
d_lon = (np.cos(theta_rad) * r_km) / lon_km | |
circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat)) | |
return circle_points, midpoint | |
# between end | |
def llmapi(text): | |
system_prompt = ( | |
"你是一个资深的地理学家,你的任务是通过给定的一段自然语言,来选择正确的定位函数顺序以及他们的输入。\n" | |
"你能选择的定位函数有:\n" | |
"1. 相对定位(Relative Positioning):输入为地点坐标,方位,距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n" | |
"2. 中间定位(Between Positioning):输入为两个地点的坐标,输出为两个地点坐标的中点。\n" | |
"请先进行思维链(CoT)推理,并最终用 JSON 格式输出你的答案,用 `<<<JSON>>>` 和 `<<<END>>>` 包裹起来。\n" | |
"请确保所有输入仅包含:地点名称(字符串)、索引(整数)、方位(字符串,必须是英文)或距离(字符串,带单位),不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n" | |
"每个步骤编号都有 id 记录,然后如果某个输入是之前步骤的输出,那么输入对应步骤的 id。\n" | |
"所有方向必须使用英文(如 south, west, northeast, etc.)。\n" | |
"示例输出:\n" | |
"<<<JSON>>>\n" | |
"[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]}," | |
"{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]}," | |
"{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]}," | |
"{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n" | |
"<<<END>>>") | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": text}, | |
] | |
chat_completion = client.chat.completions.create( | |
messages=messages, | |
model=model, | |
) | |
result = chat_completion.choices[0].message.content | |
json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL) | |
if json_match: | |
# print(json.loads(json_match.group(1))) | |
return json.loads(json_match.group(1)) | |
else: | |
raise ValueError("LLM 输出未包含预期的 JSON 格式数据。") | |
def llmapi(text): | |
system_prompt = ( | |
"You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n" | |
"The positioning functions you can choose from are:\n" | |
"1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n" | |
"2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n" | |
"You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations." | |
"First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<<JSON>>>` and `<<<END>>>`.\n" | |
"Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n" | |
"Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n" | |
"All directions must be in English (e.g., south, west, northeast, etc.).\n" | |
"Example output:\n" | |
"<<<JSON>>>\n" | |
"[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]}," | |
"{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]}," | |
"{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]}," | |
"{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n" | |
"<<<END>>>") | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": text}, | |
] | |
chat_completion = client.chat.completions.create( | |
messages=messages, | |
model=model, | |
) | |
result = chat_completion.choices[0].message.content | |
print(result) | |
json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL) | |
if json_match: | |
return json.loads(json_match.group(1)) | |
else: | |
raise ValueError("LLM 输出未包含预期的 JSON 格式数据。") | |
def get_coordinates(location): | |
request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2' | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(request_url, headers=headers, verify=False) | |
json_content = json.loads(response.content) | |
coordinates = json_content[0]['geojson']['coordinates'][0] | |
centroid = (float(json_content[0]['lon']), float(json_content[0]['lat'])) | |
return (coordinates, centroid) | |
def execute_steps(steps): | |
data = {} | |
for step in steps: | |
step_id = step['id'] | |
function = step['function'] | |
inputs = step['inputs'] | |
# print('-' * 50) | |
# print(function) | |
# print(inputs) | |
resolved_inputs = [] | |
for inp in inputs: | |
if isinstance(inp, int): | |
resolved_inputs.append(data[inp]) | |
else: | |
resolved_inputs.append(inp) | |
if function == "Relative": | |
location, direction, distance = resolved_inputs | |
if isinstance(location, str): | |
location = get_coordinates(location) | |
result = get_level3_coordinates(location, distance, direction) | |
print(location, distance, direction, 'rrr') | |
print(result) | |
data[step_id] = result | |
elif function == "Between": | |
location1, location2 = resolved_inputs | |
if isinstance(location1, str): | |
location1 = get_coordinates(location1) | |
location1 = [location1[0]] + list(location1[1:]) | |
# location1 = [location1[0][0]] + list(location1[1:]) | |
# location1[0] = location1[0] | |
if isinstance(location2, str): | |
location2 = get_coordinates(location2) | |
print(location2) | |
location2 = [location2[0]] + list(location2[1:]) | |
# location2 = [location2[0][0]] + list(location2[1:]) | |
print(location1) | |
result = get_between_coordinates(location1, location2) | |
print(location1, location2, 'bbb') | |
print(result) | |
data[step_id] = result | |
return data | |
# a = get_coordinates('Burwood') | |
# a2 = get_coordinates('Glebe') | |
# b = get_level3_coordinates(a, '5 km', 'east') | |
# c = get_between_coordinates(a, a2) | |
# 完整通道 | |
# 默认输入 | |
default_input_text = "在Chatswood南边4公里与North Sydney 东边2公里的中间的西南5公里。" | |
default_input_text = "你是一位规划师,正在为华盛顿州的一项新森林监测站选址。两个潜在的参考位置分别是雷尼尔山国家公园(Mount Rainier National Park)和北喀斯喀特国家公园(North Cascades National Park)。首先,你想在这两个国家公园之间找到一个中间点。接着,你希望在这个中间点与北喀斯喀特国家公园之间,再取一个中间位置,以便确定最终的建设候选地。" | |
default_input_text = "在Chatswood和North Sydney的中间靠近North Sydney的四分之一位置" | |
default_input_text = "Plan a trip that involves determining the midpoint between Paris and London, and then finding another midpoint between this location and Paris to identify potential stopovers during travel." | |
# default_input_text = "5km southwest of Chatswood, 4km south of Chatswood and 2km north of North Sydney." | |
with open(dataset_path, 'r', encoding='utf-8') as f: | |
data = json.load(f) | |
answer_path = 'answer/GPT4o.json' | |
answer = [] | |
for i in data: | |
parsed_steps = llmapi(i['instruction']) | |
# parsed_steps = [{"id": 1, "function": "Between", "inputs": ["Chatswood", "North Sydney"]},{"id": 2, "function": "Between", "inputs": [1, "North Sydney"]}] | |
i["steps"] = parsed_steps | |
# print(i['instruction']) | |
with open(answer_path, "r", encoding="utf-8") as f: | |
try: | |
datapoint = json.load(f) | |
except: | |
datapoint = [] | |
datapoint.append(i) | |
# print(answer) | |
with open(answer_path, "w", encoding="utf-8") as f: | |
json.dump(datapoint, f, ensure_ascii=False, indent=2) | |
# 格式转化 | |
def write_custom_json(data, filename): | |
def format_step(step): | |
inputs = json.dumps(step["inputs"], ensure_ascii=False) | |
return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}' | |
with open(filename, "w", encoding="utf-8") as f: | |
f.write("[\n") | |
for i, item in enumerate(data): | |
f.write(" {\n") | |
f.write(f' "index": {item["index"]},\n') | |
instruction = json.dumps(item["instruction"], ensure_ascii=False) | |
f.write(f' "instruction": {instruction},\n') | |
f.write(' "steps": [\n') | |
step_lines = [f" {format_step(step)}" for step in item["steps"]] | |
f.write(",\n".join(step_lines)) | |
f.write("\n ]\n") | |
f.write(" }" + (",\n" if i < len(data) - 1 else "\n")) | |
f.write("]\n") | |
# Regenerate custom formatted JSON | |
with open(answer_path, "r", encoding="utf-8") as f: | |
data = json.load(f) | |
write_custom_json(data, answer_path) |