SpatialWebAgent commited on
Commit
b247a03
·
verified ·
1 Parent(s): 843b338

Upload 7 files

Browse files
Files changed (5) hide show
  1. app.py +419 -11
  2. geo_level1.py +185 -0
  3. geoutil.py +262 -0
  4. llm_ent_extract.py +144 -0
  5. regex_spatial.py +72 -0
app.py CHANGED
@@ -1,35 +1,440 @@
1
  import gradio as gr
2
  import requests
3
  import json
 
4
  from shapely.geometry import Polygon, MultiPoint, mapping
5
  import re
6
  import geopandas as gpd
 
7
  from openai import OpenAI
8
  import numpy as np
9
  import os
10
 
 
 
 
 
11
 
12
- def get_coords(ase):
13
- request_url = 'https://nominatim.openstreetmap.org/search.php?q='+ase+'&polygon_geojson=1&accept-language=en&format=jsonv2'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  headers = {
15
  "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15"
16
  }
17
  page = requests.get(request_url, headers=headers, verify=False)
18
- # json_content = json.loads(page.content)
 
 
 
 
 
 
19
 
20
- # all_coordinates = json_content[0]['geojson']['coordinates'][0]
21
- # centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
22
 
 
23
 
24
- return page.content
25
- # return all_coordinates, centroid
26
 
27
- def process_api(input_text):
28
- # return {"result": get_coords(input_text)}
29
- return {"result": cont}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
 
 
 
 
 
 
31
 
 
 
 
 
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  request_url = 'https://nominatim.openstreetmap.org/search.php?q=Glebe&polygon_geojson=1&accept-language=en&format=jsonv2'
35
  headers = {
@@ -46,4 +451,7 @@ gr.Interface(
46
  outputs="json",
47
  title="Backend API",
48
  allow_flagging="never"
49
- ).launch()
 
 
 
 
1
  import gradio as gr
2
  import requests
3
  import json
4
+ import geoutil
5
  from shapely.geometry import Polygon, MultiPoint, mapping
6
  import re
7
  import geopandas as gpd
8
+ import geo_level1
9
  from openai import OpenAI
10
  import numpy as np
11
  import os
12
 
13
+ api_key = os.getenv('api_key')
14
+ client = OpenAI(
15
+ api_key=api_key
16
+ )
17
 
18
+ model = "gpt-4o"
19
+
20
+ north = ["north", "N'", "North", "NORTH"]
21
+ south = ["south", "S'", "South", "SOUTH"]
22
+ east = ["east", "E'", "East", "EAST"]
23
+ west = ["west", "W'", "West", "WEST"]
24
+ northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"]
25
+ southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"]
26
+ northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"]
27
+ southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"]
28
+ center = ["center","central", "downtown","midtown"]
29
+
30
+
31
+
32
+
33
+ def to_standard_2d_list(data):
34
+ arr = np.array(data)
35
+
36
+ # 强制变成一维后 reshape,前提是元素总数是2的倍数
37
+ flat = arr.flatten()
38
+ if flat.size % 2 != 0:
39
+ raise ValueError("元素个数不是2的倍数,不能 reshape 成 [N, 2] 格式")
40
+
41
+ return flat.reshape(-1, 2).tolist()
42
+
43
+
44
+ def get_geojson(ent, arr, centroid):
45
+ poly_json = {}
46
+ poly_json['type'] = 'FeatureCollection'
47
+ poly_json['features'] = []
48
+ coordinates= []
49
+ coordinates.append(arr)
50
+ poly_json['features'].append({
51
+ 'type':'Feature',
52
+ 'id': ent,
53
+ 'properties': {
54
+ 'centroid': centroid
55
+ },
56
+ 'geometry': {
57
+ 'type':'Polygon',
58
+ 'coordinates': coordinates
59
+ }
60
+ })
61
+ return poly_json
62
+
63
+
64
+ def get_coordinates(ent):
65
+ request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2'
66
  headers = {
67
  "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15"
68
  }
69
  page = requests.get(request_url, headers=headers, verify=False)
70
+ json_content = json.loads(page.content)
71
+ all_coordinates = json_content[0]['geojson']['coordinates'][0]
72
+ centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
73
+ for p in all_coordinates:
74
+ p2 = (p[0], p[1])
75
+ angle = geoutil.calculate_bearing(centroid, p2)
76
+ p.append(angle)
77
 
78
+ geojson = get_geojson(ent, all_coordinates, centroid)
 
79
 
80
+ return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid']
81
 
82
+ def get_coordinates(location):
83
+ request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2'
84
 
85
+ print(request_url)
86
+ headers = {"User-Agent": "Mozilla/5.0"}
87
+ response = requests.get(request_url, headers=headers, verify=False)
88
+ json_content = json.loads(response.content)
89
+ # print(json_content)
90
+ if json_content[0]['geojson']['type'] == 'Polygon':
91
+ coordinates = json_content[0]['geojson']['coordinates'][0]
92
+ elif json_content[0]['geojson']['type'] == 'Point':
93
+ coordinates = json_content[0]['geojson']['coordinates']
94
+ else:
95
+ print(json_content[0]['geojson']['type'])
96
+ centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
97
+ return (coordinates, centroid)
98
+
99
+
100
+ # level3
101
+ def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum):
102
+ # minimum = 157
103
+ # maximum = 202
104
+
105
+ direction_coordinates = []
106
+ for p in coordinates:
107
+ angle = geoutil.calculate_bearing(centroid, p)
108
+ p2 = (p[0], p[1], angle)
109
+ if direction in geo_level1.east:
110
+ if angle >= minimum or angle <= maximum:
111
+ direction_coordinates.append(p2)
112
+
113
+ else:
114
+ if angle >= minimum and angle <= maximum:
115
+ direction_coordinates.append(p2)
116
+ # print(type(direction_coordinates[0]))
117
+ # if(direction in geo_level1.west):
118
+ # direction_coordinates.sort(key=lambda k: k[2], reverse=True)
119
+
120
+ return direction_coordinates
121
+ def get_level3(level3):
122
+ digits = re.findall('[0-9]+', level3)[0]
123
+ unit = re.findall('[A-Za-z]+', level3)[0]
124
+ return digits, unit
125
+
126
+ def get_direction_coordinates(coordinates, centroid, level1):
127
+ min_max = geo_level1.get_min_max(level1)
128
+ if min_max is not None:
129
+ coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1])
130
+ return coord
131
+ return coordinates
132
+ def sort_west(poly1, poly2, centroid):
133
+ coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"]
134
+ coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"]
135
+ coord1 = []
136
+ coord2 = []
137
+ coord = []
138
+ for c in coords1:
139
+ pol = list(c[::-1])
140
+ coord1.extend(pol)
141
+ for c in coords2:
142
+ pol = list(c[::-1])
143
+ coord2.extend(pol)
144
+ coo1 = []
145
+ coo2 = []
146
+ for p in coord1:
147
+ angle = geoutil.calculate_bearing(centroid, p)
148
+ if angle >= 157 and angle <= 202:
149
+ coo1.append((p[0], p[1], angle))
150
+ for p in coord2:
151
+ angle = geoutil.calculate_bearing(centroid, p)
152
+ if angle >= 157 and angle <= 202:
153
+ coo2.append((p[0], p[1], angle))
154
+ coo1.extend(coo2)
155
+ return coo1
156
+
157
+
158
+ def get_level3_coordinates(coordinates, level_3, level1):
159
+ distance, unit = get_level3(level_3)
160
+ kms = geoutil.get_kilometers(distance, unit)
161
+ coord = []
162
+
163
+ coords0, center = coordinates
164
+
165
+ if not isinstance(coords0, list) or len(coords0) < 3:
166
+
167
+ # 从原始点出发,根据方向移动距离 kms 得到新圆心
168
+ lat_km = 111.32
169
+ lon_km = 111.32 * np.cos(np.radians(center[1]))
170
+
171
+ dx = dy = 0
172
+
173
+ if level1 is not None:
174
+ if level1 in geo_level1.east:
175
+ dx = kms / lon_km
176
+ elif level1 in geo_level1.west:
177
+ dx = -kms / lon_km
178
+ elif level1 in geo_level1.north:
179
+ dy = kms / lat_km
180
+ elif level1 in geo_level1.south:
181
+ dy = -kms / lat_km
182
+ # 你也可以支持 northeast、southwest 等复合方向
183
+
184
+ new_center = (center[0] + dx, center[1] + dy)
185
+
186
+ # 用固定半径画个圆(例如半径2km)
187
+ r_km = 1 # 半径设为1km,你也可以设为其他值
188
+
189
+ circle_points = []
190
+ for theta in np.linspace(0, 360, num=100):
191
+ theta_rad = np.radians(theta)
192
+ d_lat = (np.sin(theta_rad) * r_km) / lat_km
193
+ d_lon = (np.cos(theta_rad) * r_km) / lon_km
194
+ circle_points.append((new_center[0] + d_lon, new_center[1] + d_lat))
195
+
196
+ # 输出中心(使用新圆心)
197
+ if circle_points:
198
+ center_point = MultiPoint(circle_points).centroid
199
+ center = (center_point.x, center_point.y)
200
+ else:
201
+ center = new_center
202
+
203
+ return circle_points, center
204
+
205
+ # 正常 polygon 流程
206
+ poly1 = Polygon(coords0)
207
+ polygon1 = gpd.GeoSeries(poly1)
208
+
209
+ # 生成环形区域
210
+ poly2 = polygon1.buffer(0.0095 * kms, join_style=2)
211
+ poly3 = polygon1.buffer(0.013 * kms, join_style=2)
212
+ poly = poly3.difference(poly2)
213
+
214
+ # 获取坐标
215
+ coords = mapping(poly)["features"][0]["geometry"]["coordinates"]
216
+ for c in coords:
217
+ pol = list(c[::-1])
218
+ coord.extend(pol)
219
+
220
+ # 方向裁剪
221
+ if level1 is not None:
222
+ coord = get_direction_coordinates(coord, coordinates[1], level1)
223
+ if level1 in geo_level1.west:
224
+ coord = sort_west(poly3, poly2, coordinates[1])
225
+
226
+ # 计算质心
227
+ if coord:
228
+ center_point = MultiPoint(coord).centroid
229
+ center = (center_point.x, center_point.y)
230
+ else:
231
+ center = coordinates[1]
232
+
233
+ return coord, center
234
+ # level 3 end
235
+
236
+ # between
237
+ def get_between_coordinates(coordinates1, coordinates2):
238
+ """
239
+ 计算两个区域之间的中间点,并生成一个等面积的圆形区域。
240
+ 如果某个输入仅为点(坐标长度 < 3),则其面积设为 0;
241
+ 如果两个输入都是点,则默认半径为 2km。
242
+ :param coordinates1: 第一个区域的边界坐标和中心点
243
+ :param coordinates2: 第二个区域的边界坐标和中心点
244
+ :return: 圆形区域的坐标集和圆心
245
+ """
246
+
247
+ def is_valid_polygon(coords):
248
+ return isinstance(coords, list) and len(coords) >= 3
249
+
250
+ coords1, center1 = coordinates1
251
+ coords2, center2 = coordinates2
252
 
253
+ # 判断输入是否为合法多边形(>=3个点)
254
+ if is_valid_polygon(coords1):
255
+ poly1 = Polygon(coords1)
256
+ area1 = poly1.area
257
+ else:
258
+ area1 = 0
259
 
260
+ if is_valid_polygon(coords2):
261
+ poly2 = Polygon(coords2)
262
+ area2 = poly2.area
263
+ else:
264
+ area2 = 0
265
 
266
+ # 计算中心点(两个中心的中点)
267
+ midpoint = (
268
+ (center1[0] + center2[0]) / 2,
269
+ (center1[1] + center2[1]) / 2
270
+ )
271
+
272
+ # 如果两个区域都是点,则使用默认半径 2km
273
+ if area1 == 0 and area2 == 0:
274
+ r_km = 2
275
+ else:
276
+ avg_area = (area1 + area2) / 2
277
+ r_km = np.sqrt(avg_area / np.pi) * 111.32 # 近似 km 半径
278
+
279
+ # 经纬度距离换算因子
280
+ lat_km = 111.32
281
+ lon_km = 111.32 * np.cos(np.radians(midpoint[1]))
282
+
283
+ # 生成圆形区域坐标(100个点)
284
+ circle_points = []
285
+ for theta in np.linspace(0, 360, num=100):
286
+ theta_rad = np.radians(theta)
287
+ d_lat = (np.sin(theta_rad) * r_km) / lat_km
288
+ d_lon = (np.cos(theta_rad) * r_km) / lon_km
289
+ circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat))
290
+
291
+ return circle_points, midpoint
292
+ # between end
293
+
294
+
295
+ def llmapi(text):
296
+ system_prompt = (
297
+ "你是一个资深的地理学家,你的任务是通过给定的一段自然语言,来选择正确的定位函数顺序以及他们的输入。\n"
298
+ "你能选择的定位函数有:\n"
299
+ "1. 相对定位(Relative Positioning):输入为地点坐标,方位,距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n"
300
+ "2. 中间定位(Between Positioning):输入为两个地点的坐标,输出为两个地点坐标的中点。\n"
301
+ "请先进行思维链(CoT)推理,并最终用 JSON 格式输出你的答案,用 `<<<JSON>>>` 和 `<<<END>>>` 包裹起来。\n"
302
+ "请确保所有输入仅包含:地点名称(字符串)、索引(整数)、方位(字符串,必须是英文)或距离(字符串,带单位),不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n"
303
+ "每个步骤编号都有 id 记录,然后如果某个输入是之前步骤的输出,那么输入对应步骤的 id。\n"
304
+ "所有方向必须使用英文(如 south, west, northeast, etc.)。\n"
305
+ "示例输出:\n"
306
+ "<<<JSON>>>\n"
307
+ "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
308
+ "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
309
+ "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
310
+ "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
311
+ "<<<END>>>")
312
+
313
+ messages = [
314
+ {"role": "system", "content": system_prompt},
315
+ {"role": "user", "content": text},
316
+ ]
317
+
318
+ chat_completion = client.chat.completions.create(
319
+ messages=messages,
320
+ model=model,
321
+ )
322
+
323
+ result = chat_completion.choices[0].message.content
324
+ json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)
325
+
326
+ if json_match:
327
+ # print(json.loads(json_match.group(1)))
328
+ return json.loads(json_match.group(1))
329
+ else:
330
+ raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
331
+ def llmapi(text):
332
+ system_prompt = (
333
+ "You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n"
334
+ "The positioning functions you can choose from are:\n"
335
+ "1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n"
336
+ "2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n"
337
+ "You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations."
338
+ "First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<<JSON>>>` and `<<<END>>>`.\n"
339
+ "Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n"
340
+ "Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n"
341
+ "All directions must be in English (e.g., south, west, northeast, etc.).\n"
342
+ "Example output:\n"
343
+ "<<<JSON>>>\n"
344
+ "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
345
+ "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
346
+ "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
347
+ "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
348
+ "<<<END>>>")
349
+
350
+ messages = [
351
+ {"role": "system", "content": system_prompt},
352
+ {"role": "user", "content": text},
353
+ ]
354
+
355
+ chat_completion = client.chat.completions.create(
356
+ messages=messages,
357
+ model=model,
358
+ )
359
+
360
+ result = chat_completion.choices[0].message.content
361
+ print(result)
362
+ json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)
363
+
364
+ if json_match:
365
+ return json.loads(json_match.group(1))
366
+ else:
367
+ raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
368
+
369
+
370
+
371
+
372
+
373
+ def execute_steps(steps):
374
+ data = {}
375
+
376
+ for step in steps:
377
+ step_id = step['id']
378
+ function = step['function']
379
+ inputs = step['inputs']
380
+ # print('-' * 50)
381
+ # print(function)
382
+ # print(inputs)
383
+
384
+
385
+ resolved_inputs = []
386
+ for inp in inputs:
387
+ if isinstance(inp, int):
388
+ resolved_inputs.append(data[inp])
389
+ else:
390
+ resolved_inputs.append(inp)
391
+ if function == "Relative":
392
+ location, direction, distance = resolved_inputs
393
+ if isinstance(location, str):
394
+ location = get_coordinates(location)
395
+
396
+ location = [to_standard_2d_list(location[0])] + list(location[1:])
397
+ location = [[[151.214901,-33.859175]], (151.214901,-33.859175)]
398
+ result = get_level3_coordinates(location, distance, direction)
399
+ data[step_id] = result
400
+
401
+ elif function == "Between":
402
+
403
+
404
+ location1, location2 = resolved_inputs
405
+ # print(location1)
406
+ # print(111)
407
+ # print(location2)
408
+ if isinstance(location1, str):
409
+ location1 = get_coordinates(location1)
410
+
411
+ location1 = [to_standard_2d_list(location1[0])] + list(location1[1:])
412
+ if isinstance(location2, str):
413
+
414
+ location2 = get_coordinates(location2)
415
+ location2 = [to_standard_2d_list(location2[0])] + list(location2[1:])
416
+ result = get_between_coordinates(location1, location2)
417
+
418
+ data[step_id] = result
419
+
420
+ return data
421
+
422
+
423
+ def process_api(input_text):
424
+ # 这里编写实际的后端处理逻辑
425
+
426
+ # return {
427
+ # "status": "success",
428
+ # # "result": f"Processed: {input_text.upper()}",
429
+ # "result": f"Processed: {nlp(input_text).to_json()}",
430
+ # "timestamp": time.time()
431
+ # }
432
+ parsed_steps = llmapi(input_text)
433
+ result = execute_steps(parsed_steps)
434
+ coords = result[(max(result.keys()))]
435
+
436
+ geojson = get_geojson(None, coords[0], coords[1])
437
+ return geojson
438
 
439
  request_url = 'https://nominatim.openstreetmap.org/search.php?q=Glebe&polygon_geojson=1&accept-language=en&format=jsonv2'
440
  headers = {
 
451
  outputs="json",
452
  title="Backend API",
453
  allow_flagging="never"
454
+ ).launch(debug=True)
455
+
456
+
457
+
geo_level1.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Tue Aug 2 12:38:31 2022
5
+
6
+ @author: syed
7
+ """
8
+ import regex_spatial
9
+ import geoutil
10
+
11
+
12
+ north = ["north", "N'", "North", "NORTH"]
13
+ south = ["south", "S'", "South", "SOUTH"]
14
+ east = ["east", "E'", "East", "EAST"]
15
+ west = ["west", "W'", "West", "WEST"]
16
+ northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST", 'northeast']
17
+ southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST", 'southeast']
18
+ northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST", 'northwest']
19
+ southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST", 'southwest']
20
+ center = ["center","central", "downtown","midtown"]
21
+ def get_min_max(direction):
22
+ regex = regex_spatial.get_directional_regex()
23
+ direction_list = regex.split("|")
24
+ if direction in direction_list:
25
+ if direction in east:
26
+ return (337, 22)
27
+ if direction in northeast:
28
+ return (22, 67)
29
+ if direction in north:
30
+ return (67, 112)
31
+ if direction in northwest:
32
+ return (112, 157)
33
+ if direction in west:
34
+ return (157, 202)
35
+ if direction in southwest:
36
+ return (202, 247)
37
+ if direction in south:
38
+ return (247, 292)
39
+ if direction in southeast:
40
+ return (292, 337)
41
+
42
+ return None
43
+
44
+
45
+ def get_min_max(direction):
46
+ regex = regex_spatial.get_directional_regex()
47
+ direction_list = regex.split("|")
48
+ if direction in direction_list:
49
+ if direction in east:
50
+ return (337, 22)
51
+ if direction in northeast:
52
+ return (292, 337)
53
+ if direction in north:
54
+ return (247, 292)
55
+ if direction in northwest:
56
+ return (202, 247)
57
+ if direction in west:
58
+ return (157, 202)
59
+ if direction in southwest:
60
+ return (112, 157)
61
+ if direction in south:
62
+ return (67, 112)
63
+ if direction in southeast:
64
+ return (22, 67)
65
+
66
+ return None
67
+
68
+ # def get_min_max(direction):
69
+ # regex = regex_spatial.get_directional_regex()
70
+ # direction_list = regex.split("|")
71
+ # if direction in direction_list:
72
+ # if direction in north:
73
+ # return (337, 22)
74
+ # if direction in northeast:
75
+ # return (22, 67)
76
+ # if direction in east:
77
+ # return (67, 112)
78
+ # if direction in southeast:
79
+ # return (112, 157)
80
+ # if direction in south:
81
+ # return (157, 202)
82
+ # if direction in southwest:
83
+ # return (202, 247)
84
+ # if direction in west:
85
+ # return (247, 292)
86
+ # if direction in northwest:
87
+ # return (292, 337)
88
+ #
89
+ # return None
90
+
91
+ def get_directional_coordinates_by_angle(coordinates, direction, minimum, maximum):
92
+ direction_coordinates = []
93
+ for p in coordinates:
94
+ if direction in east:
95
+ if p[2] >= minimum or p[2] <= maximum:
96
+ direction_coordinates.append(p)
97
+
98
+ else:
99
+ if p[2] >= minimum and p[2] <= maximum:
100
+ direction_coordinates.append(p)
101
+ return direction_coordinates
102
+
103
+ def get_directional_coordinates(coordinates, direction, centroid , minimum, maximum, is_midmid):
104
+ direction_coordinates = get_directional_coordinates_by_angle(coordinates, direction, minimum, maximum)
105
+ print(direction_coordinates, 'ddddd')
106
+ midmid1, midmid2= geoutil.get_midmid_point(centroid, direction_coordinates[0],direction_coordinates[-1], is_midmid)
107
+ if direction in west:
108
+ maxi = max(p[2] for p in direction_coordinates)
109
+ mini = min(p[2] for p in direction_coordinates)
110
+ index_mini = 0
111
+ index_maxi = 0
112
+ for idx,p in enumerate(direction_coordinates):
113
+ if p[2] == mini:
114
+ index_mini = idx
115
+ if p[2] == maxi:
116
+ index_maxi = idx
117
+
118
+ direction_coordinates.insert(index_maxi+1, midmid2)
119
+ direction_coordinates.insert(index_mini+1, midmid1)
120
+ print(index_maxi+1, midmid2, 'imim')
121
+ print(index_mini+1, midmid1, 'imim')
122
+ else:
123
+ direction_coordinates.append(midmid2)
124
+ direction_coordinates.append(midmid1)
125
+
126
+ return direction_coordinates, midmid1, midmid2
127
+
128
+ def get_level1_coordinates(coordinates, centroid, direction, is_midmid):
129
+ min_max = get_min_max(direction)
130
+ if min_max is not None:
131
+ coordinates, mid1, mid2 = get_directional_coordinates(coordinates, direction, centroid,
132
+ min_max[0], min_max[1],is_midmid)
133
+ # print(mid1, 'min_max')
134
+ # print(mid2, 'min_max')
135
+ print(coordinates, 'min_max')
136
+ print("Level 1 Coordinates///")
137
+ for idx, p in enumerate(coordinates):
138
+ print(idx, p)
139
+ return coordinates, centroid, mid1, mid2
140
+ elif direction.lower() in center:
141
+ return get_central(coordinates, centroid, direction, is_midmid), centroid, None, None
142
+ else :
143
+ return coordinates, centroid, None, None
144
+
145
+
146
+ def get_central(coordinates, centroid, direction, is_midmid):
147
+
148
+ n_min_max = get_min_max("north")
149
+ n_coordinates=get_directional_coordinates_by_angle(coordinates, "north", n_min_max[0], n_min_max[1])
150
+ n_mid1, n_mid2 = geoutil.get_midmid_point(centroid,n_coordinates[0],n_coordinates[-1], is_midmid)
151
+
152
+ ne_min_max = get_min_max("north east")
153
+ ne_coordinates=get_directional_coordinates_by_angle(coordinates, "north east", ne_min_max[0], ne_min_max[1])
154
+ ne_mid1, ne_mid2 = geoutil.get_midmid_point(centroid,ne_coordinates[0],ne_coordinates[-1], is_midmid)
155
+
156
+ e_min_max = get_min_max("east")
157
+ e_coordinates=get_directional_coordinates_by_angle(coordinates, "east", e_min_max[0], e_min_max[1])
158
+ e_mid1, e_mid2 = geoutil.get_midmid_point(centroid,e_coordinates[0],e_coordinates[-1], is_midmid)
159
+
160
+ se_min_max = get_min_max("south east")
161
+ se_coordinates=get_directional_coordinates_by_angle(coordinates, "south east", se_min_max[0], se_min_max[1])
162
+ se_mid1, se_mid2 = geoutil.get_midmid_point(centroid,se_coordinates[0],se_coordinates[-1], is_midmid)
163
+
164
+ s_min_max = get_min_max("south")
165
+ s_coordinates=get_directional_coordinates_by_angle(coordinates, "south", s_min_max[0], s_min_max[1])
166
+ s_mid1, s_mid2 = geoutil.get_midmid_point(centroid,s_coordinates[0],s_coordinates[-1], is_midmid)
167
+
168
+ sw_min_max = get_min_max("south west")
169
+ sw_coordinates=get_directional_coordinates_by_angle(coordinates, "south west", sw_min_max[0], sw_min_max[1])
170
+ sw_mid1, sw_mid2 = geoutil.get_midmid_point(centroid,sw_coordinates[0],sw_coordinates[-1], is_midmid)
171
+
172
+ w_min_max = get_min_max("west")
173
+ w_coordinates=get_directional_coordinates_by_angle(coordinates, "west", w_min_max[0], w_min_max[1])
174
+ w_mid1, w_mid2 = geoutil.get_midmid_point(centroid,w_coordinates[0],w_coordinates[-1], is_midmid)
175
+
176
+ nw_min_max = get_min_max("north west")
177
+ nw_coordinates=get_directional_coordinates_by_angle(coordinates, "north west", nw_min_max[0], nw_min_max[1])
178
+ nw_mid1, nw_mid2 = geoutil.get_midmid_point(centroid,nw_coordinates[0],nw_coordinates[-1], is_midmid)
179
+
180
+ central_coordindates =[e_mid1, e_mid2, ne_mid1, ne_mid2, n_mid1, n_mid2,
181
+ nw_mid1, nw_mid2, w_mid1, w_mid2, sw_mid1, sw_mid2,
182
+ s_mid1, s_mid2, se_mid1, se_mid2]
183
+ return central_coordindates
184
+
185
+
geoutil.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Tue Aug 2 12:45:21 2022
5
+
6
+ @author: syed
7
+ """
8
+
9
+ import math
10
+ import re
11
+ import regex_spatial
12
+ import quantities as pq
13
+ from math import radians, cos, sin, asin, sqrt
14
+ import quantities as pq
15
+
16
+
17
+
18
+ def get_kilometers(d, unit):
19
+ q = float(d) * pq.CompoundUnit(unit)
20
+ q.units = pq.km
21
+ return q.magnitude
22
+
23
+ def ConvertToRadian(input):
24
+ return input * math.pi / 180
25
+
26
+ def get_level1(ent):
27
+ level_1 = re.search(regex_spatial.get_level1_regex(), ent)
28
+ if level_1 is not None:
29
+ return level_1.group()
30
+ return None
31
+ def get_level2(ent):
32
+ level_2 = re.search(regex_spatial.get_level2_regex(), ent)
33
+ if level_2 is not None:
34
+ return level_2.group()
35
+ return None
36
+ def get_level3(ent):
37
+ level_3 = re.search(regex_spatial.get_level3_regex(), ent)
38
+ if level_3 is not None:
39
+ return level_3.group()
40
+ return None
41
+
42
+ def get_ase(ent):
43
+ abs_sp = ent.split("_")
44
+ return abs_sp[len(abs_sp)-1]
45
+
46
+ def get_ent(ent):
47
+ return get_ase(ent), get_level1(ent), get_level2(ent), get_level3(ent)
48
+
49
+ def get_centroid(coordinates, centroid, mini, maxi):
50
+ average = (mini + maxi)/2
51
+ diff = []
52
+ ind = 0
53
+ for p in coordinates:
54
+ diff.append(abs( p[2] - average))
55
+
56
+ ind = diff.index(min(diff))
57
+
58
+ return midpoint(centroid[0], centroid[1], coordinates[ind][0], coordinates[ind][1], average)
59
+
60
+
61
+ def calculateArea(coordinates):
62
+ area = 0
63
+ if (len(coordinates) > 2):
64
+ i = 0
65
+ for i in range(len(coordinates) - 1):
66
+ p1 = coordinates[i]
67
+ p2 = coordinates[i + 1]
68
+ area += math.radians(p2[0] - p1[0]) * (2 + math.sin(ConvertToRadian(p1[1])) + math.sin(math.radians(p2[0])))
69
+
70
+
71
+ area = area * 6378137 * 6378137 / 1000000
72
+
73
+ area = abs(round(area, 2)) + 2
74
+
75
+ return area
76
+
77
+ def get_midmid_point(centroid, point1, point2, is_midmid):
78
+ mid1 = midpoint(centroid[0], centroid[1],
79
+ point1[0], point1[1]
80
+ , point1[2])
81
+ mid2 = midpoint(centroid[0], centroid[1],
82
+ point2[0], point2[1],
83
+ point2[2])
84
+ midmid1 = midpoint(centroid[0], centroid[1],
85
+ mid1[0], mid1[1]
86
+ , mid1[2])
87
+ midmid2 = midpoint(centroid[0], centroid[1],
88
+ mid2[0], mid2[1],
89
+ mid2[2])
90
+ if is_midmid:
91
+ return midmid1, midmid2
92
+ else:
93
+ return mid1, mid2
94
+
95
+ def getPointByDistanceAngle(lat, ln, angle, distanceInKm):
96
+
97
+ R = 6378.1 #Radius of the Earth
98
+ brng = angle * math.pi /180 #Bearing is 90 degrees converted to radians.
99
+ d = distanceInKm #Distance in km
100
+
101
+ #lat2 52.20444 - the lat result I'm hoping for
102
+ #lon2 0.36056 - the long result I'm hoping for.
103
+
104
+ lat1 = math.radians(lat) #Current lat point converted to radians
105
+ lon1 = math.radians(ln) #Current long point converted to radians
106
+
107
+ lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
108
+ math.cos(lat1)*math.sin(d/R)*math.cos(brng))
109
+
110
+ lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
111
+ math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
112
+
113
+ lat2 = math.degrees(lat2)
114
+ lon2 = math.degrees(lon2)
115
+
116
+ return [lon2, lat2, angle]
117
+
118
+ def midpoint(x1, y1, x2, y2, angle):
119
+
120
+ lonA = math.radians(y1)
121
+ lonB = math.radians(y2)
122
+ latA = math.radians(x1)
123
+ latB = math.radians(x2)
124
+
125
+ dLon = lonB - lonA
126
+
127
+ Bx = math.cos(latB) * math.cos(dLon)
128
+ By = math.cos(latB) * math.sin(dLon)
129
+
130
+ latC = math.atan2(math.sin(latA) + math.sin(latB),
131
+ math.sqrt((math.cos(latA) + Bx) * (math.cos(latA) + Bx) + By * By))
132
+ lonC = lonA + math.atan2(By, math.cos(latA) + Bx)
133
+ lonC = (lonC + 3 * math.pi) % (2 * math.pi) - math.pi
134
+ latitude = round(math.degrees(latC), 8)
135
+ longitude = round(math.degrees(lonC),8)
136
+ return [latitude, longitude, angle]
137
+
138
+ def midpoint(y1, x1, y2, x2, angle):
139
+
140
+ lonA = math.radians(y1)
141
+ lonB = math.radians(y2)
142
+ latA = math.radians(x1)
143
+ latB = math.radians(x2)
144
+
145
+ dLon = lonB - lonA
146
+
147
+ Bx = math.cos(latB) * math.cos(dLon)
148
+ By = math.cos(latB) * math.sin(dLon)
149
+
150
+ latC = math.atan2(math.sin(latA) + math.sin(latB),
151
+ math.sqrt((math.cos(latA) + Bx) * (math.cos(latA) + Bx) + By * By))
152
+ lonC = lonA + math.atan2(By, math.cos(latA) + Bx)
153
+ lonC = (lonC + 3 * math.pi) % (2 * math.pi) - math.pi
154
+ latitude = round(math.degrees(latC), 8)
155
+ longitude = round(math.degrees(lonC) ,8)
156
+ print([longitude, latitude, angle], 'midmid')
157
+ return [longitude, latitude, angle
158
+
159
+ ]
160
+
161
+ def calculate_bearing(pointA, pointB):
162
+
163
+ if (type(pointA) != tuple) or (type(pointB) != tuple):
164
+ return 400
165
+ if (type(pointB[0]) != float) or (type(pointB[0]) != float):
166
+ return 400
167
+
168
+ lat1 = math.radians(pointA[0])
169
+ lat2 = math.radians(pointB[0])
170
+
171
+ diffLong = math.radians(pointB[1] - pointA[1])
172
+
173
+ x = math.sin(diffLong) * math.cos(lat2)
174
+ y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1)
175
+ * math.cos(lat2) * math.cos(diffLong))
176
+
177
+ initial_bearing = math.atan2(x, y)
178
+
179
+
180
+ initial_bearing = math.degrees(initial_bearing)
181
+ compass_bearing = (initial_bearing + 360) % 360
182
+
183
+ return compass_bearing
184
+
185
+ def getPointByDistanceAngle(lat, ln, angle, distance, unit):
186
+
187
+ #distanceInKm = distance
188
+ R = 6378.1 #Radius of the Earth
189
+ brng = float(angle) * math.pi /180 #Bearing is 90 degrees converted to radians.
190
+ d = get_kilometers(distance, unit) #Distance in km
191
+
192
+
193
+ lat1 = math.radians(lat) #Current lat point converted to radians
194
+ lon1 = math.radians(ln) #Current long point converted to radians
195
+
196
+ lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
197
+ math.cos(lat1)*math.sin(d/R)*math.cos(brng))
198
+
199
+ lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
200
+ math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
201
+
202
+ lat2 = math.degrees(lat2)
203
+ lon2 = math.degrees(lon2)
204
+
205
+ return (round(lon2,8), round(lat2,8), angle)
206
+
207
+
208
+ def calculatePointByDistance(lat, ln, angle, distance, unit):
209
+ coff = 100/(6378*1.56)
210
+ kms = get_kilometers(distance, unit)
211
+
212
+ d = kms * coff
213
+
214
+ angle_x = math.cos( angle ) # * math.pi/180
215
+ angle_y = math.sin( angle) # * math.pi/180
216
+ lat_new = lat + (d * angle_x)
217
+ ln_new = ln + (d * angle_y)
218
+
219
+ return (round(ln_new,8), round(lat_new,8), angle)
220
+
221
+
222
+
223
+ def pointByAngle(lat, ln, angle, distance, unit):
224
+
225
+ R = 6378.1 #Radius of the Earth
226
+ brng = angle * math.pi /180 #Bearing is 90 degrees converted to radians.
227
+ d = get_kilometers(distance, unit) #Distance in km
228
+
229
+ #lat2 52.20444 - the lat result I'm hoping for
230
+ #lon2 0.36056 - the long result I'm hoping for.
231
+
232
+ lat1 = math.radians(lat) #Current lat point converted to radians
233
+ lon1 = math.radians(ln) #Current long point converted to radians
234
+
235
+ lat2 = math.asin( math.sin(lat1)*math.cos(d/R) +
236
+ math.cos(lat1)*math.sin(d/R)*math.cos(brng))
237
+
238
+ lon2 = lon1 + math.atan2(math.sin(brng)*math.sin(d/R)*math.cos(lat1),
239
+ math.cos(d/R)-math.sin(lat1)*math.sin(lat2))
240
+
241
+ lat2 = math.degrees(lat2)
242
+ lon2 = math.degrees(lon2)
243
+
244
+ return (lon2, lat2, angle)
245
+
246
+
247
+ def getPointByDistance(lat, ln, angle, distance, unit):
248
+ kms = get_kilometers(distance, unit)
249
+ coef = kms / 111.32
250
+ new_lat = lat + coef
251
+ new_long = ln + coef / math.cos(lat * 0.01745)
252
+ return (round(new_lat,8), round(new_long,8), angle)
253
+
254
+ def haversine(lon1, lat1, lon2, lat2):
255
+ lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
256
+ dlon = lon2 - lon1
257
+ dlat = lat2 - lat1
258
+ a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
259
+ c = 2 * asin(sqrt(a))
260
+ km = 6371* c
261
+ return km
262
+
llm_ent_extract.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import re
3
+ import os
4
+
5
+
6
+
7
+
8
+ model = "gpt-3.5-turbo"
9
+ model = "gpt-4o"
10
+
11
+ def extract_GPE(text):
12
+ system_prompt = '''You are a professional geographer. Your task is to extract all geopolitical entities from a given text. Geopolitical entities can include countries, regions, cities, autonomous regions, or other administrative divisions. For each geopolitical entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
13
+ sent = 'Where is France?'
14
+
15
+ math_bot_messages = [
16
+ {"role": "system",
17
+ "content": system_prompt},
18
+ {"role": "user", "content": text},
19
+ ]
20
+
21
+ chat_completion = client.chat.completions.create(
22
+ messages=math_bot_messages,
23
+ model=model,
24
+ )
25
+
26
+ result = chat_completion.choices[0].message.content
27
+ return result
28
+
29
+
30
+ def extract_LOC(text):
31
+ system_prompt = '''You are a professional geographer. Your task is to extract all location entities (LOC) from a given text. Location entities can include physical locations such as landmarks, geographical features, mountains, rivers, oceans, and places, but do not include political or administrative divisions such as countries or cities (these are considered geopolitical entities). For each location entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
32
+ sent = 'The Grand Canyon is one of the most spectacular natural wonders in the world, located in the state of Arizona. Nearby, the Colorado River flows through the canyon, carving its way through the rugged terrain. In the north, the Rocky Mountains stretch across several states, including Colorado and Wyoming.'
33
+
34
+ math_bot_messages = [
35
+ {"role": "system",
36
+ "content": system_prompt},
37
+ {"role": "user", "content": text},
38
+ ]
39
+
40
+ chat_completion = client.chat.completions.create(
41
+ messages=math_bot_messages,
42
+ model=model,
43
+ )
44
+
45
+ result = chat_completion.choices[0].message.content
46
+ return result
47
+
48
+
49
+ def extract_RSE_1(text):
50
+ system_prompt = '''You are a professional geographer. Your task is to extract all spatial entities (directional keywords) from a given text. Spatial entities can include directional keywords such as north, south, east, west, and more specific terms like northeast, northwest, southeast, southwest, as well as terms indicating locations like center, central, downtown, and midtown. For each spatial entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The hotel is located in the downtown area of New York, just south of Central Park, with a beautiful view of the southeast corner."\n\nExpected Output:\n[###downtown###, ###south###, ###southeast###]'''
51
+ sent = 'The train station is situated in the central part of the city, just north of the river and east of the main square.'
52
+
53
+ math_bot_messages = [
54
+ {"role": "system",
55
+ "content": system_prompt},
56
+ {"role": "user", "content": text},
57
+ ]
58
+
59
+ chat_completion = client.chat.completions.create(
60
+ messages=math_bot_messages,
61
+ model=model,
62
+ )
63
+
64
+ result = chat_completion.choices[0].message.content
65
+ return result
66
+
67
+
68
+ def extract_RSE_2(text):
69
+ system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy spatial entities (keywords) from a given text. Fuzzy spatial keywords can include terms like nearby, near, vicinity, close, beside, next, adjacent, immediate, border, surrounding, neighbourhood, proximity, territory, locality, and similar terms. For each fuzzy spatial keyword, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located nearby the lake, with several cafes close to the walking paths, and a small garden adjacent to the main entrance."\n\nExpected Output:\n[###nearby###, ###close###, ###adjacent###]'''
70
+ sent = 'The village is situated in the vicinity of the mountain range, with a small river flowing beside the houses and several farms next to the road.'
71
+
72
+ math_bot_messages = [
73
+ {"role": "system",
74
+ "content": system_prompt},
75
+ {"role": "user", "content": text},
76
+ ]
77
+
78
+ chat_completion = client.chat.completions.create(
79
+ messages=math_bot_messages,
80
+ model=model,
81
+ )
82
+
83
+ result = chat_completion.choices[0].message.content
84
+ return result
85
+
86
+
87
+ def extract_RSE_3(text):
88
+ system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy distance keywords from a given text. Fuzzy distance keywords include numeric values followed by distance units such as kilometer, mile, meter, foot, inch, centimeter, and other related units. The distance units can be in different formats, such as km, m, mi, ft, yd, cm, mm, or even in full words like kilometer, mile, or inch. For each fuzzy distance keyword, wrap the entire expression (number and unit) in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located 3 km away from the city center, while the nearest supermarket is only 500 meters from here, and the lake is about 1 mile further down the road."\n\nExpected Output:\n[###3 km###, ###500 meters###, ###1 mile###]'''
89
+ sent = 'The school is located approximately 2 miles from the station, while the nearest bus stop is 200 meters away. The hiking trail is about 5 kilometers east of the town center.'
90
+
91
+ math_bot_messages = [
92
+ {"role": "system",
93
+ "content": system_prompt},
94
+ {"role": "user", "content": text},
95
+ ]
96
+
97
+ chat_completion = client.chat.completions.create(
98
+ messages=math_bot_messages,
99
+ model=model,
100
+ )
101
+
102
+ result = chat_completion.choices[0].message.content
103
+ return result
104
+
105
+
106
+ def extract(respond, entity_type):
107
+ """
108
+ 从输入字符串中提取被 ### 包裹的实体,并将其映射到给定的实体类型。
109
+
110
+ :param respond: 包含实体的字符串,例如 '[###2 miles###, ###200 meters###, ###5 kilometers###]'
111
+ :param entity_type: 给所有提取的实体赋予的类型(字符串)
112
+ :return: 一个字典,键是提取出的实体,值是相同的 entity_type
113
+ """
114
+ # 提取被 ### 包裹的内容
115
+ extracted_entities = re.findall(r'###(.*?)###', respond)
116
+
117
+ # 生成字典,将所有实体映射到同一个类型
118
+ entity_dict = {entity.strip(): entity_type for entity in extracted_entities}
119
+
120
+ return entity_dict
121
+
122
+
123
+ def llmapi(text):
124
+ system_prompt = '请你填入需要的提示'
125
+ math_bot_messages = [
126
+ {"role": "system",
127
+ "content": system_prompt},
128
+ {"role": "user", "content": text},
129
+ ]
130
+
131
+ chat_completion = client.chat.completions.create(
132
+ messages=math_bot_messages,
133
+ model=model,
134
+ )
135
+
136
+ result = chat_completion.choices[0].message.content
137
+ return result
138
+
139
+
140
+ # print(extract('[###2 miles###, ###200 meters###, ###5 kilometers###]', 'rse'))
141
+ # print(extract(extract_GPE('Between Burwood and Glebe.'), 'gpe'))
142
+
143
+
144
+
regex_spatial.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Tue Jul 26 14:57:18 2022
5
+
6
+ @author: syed
7
+ """
8
+
9
+ from quantities import units as u
10
+ from quantities import Quantity
11
+
12
+ one_plus = "+"
13
+ zero_plus = "*"
14
+
15
+
16
+ def get_quantities_regex():
17
+ # myList = [unit for unit in dir(u.length)
18
+ # if type(getattr(u.length, unit)) is u.length ]
19
+ myList = [unit for unit in dir(u.length) if isinstance(getattr(u.length, unit), Quantity)]
20
+
21
+ units = [ x for x in myList if "_" not in x ]
22
+ units_regex = '|'.join(units)
23
+ return "["+units_regex+"]"
24
+ def get_number_regex():
25
+ regex = "[0-9]"
26
+ return regex
27
+ def get_space_regex():
28
+ regex = "\s"
29
+ return regex
30
+
31
+ def get_directional_regex():
32
+ cardinals_kwds = "north|south|east|west"
33
+ ordinals_kwds = "north-east|north-west|south-east|south-west|north east|north west|south east|south west|northeast|northwest|southeast|southwest"
34
+ symbols_kwds = "N'|S'|E'|W'|NE'|NW'|SE'|SW'"
35
+ return ordinals_kwds+"|"+symbols_kwds+"|"+cardinals_kwds
36
+
37
+ def get_center_regex():
38
+ center_kwds = "center|central|downtown|midtown"
39
+ return center_kwds
40
+
41
+ def get_near_regex():
42
+ near_kwds = "nearby|near|vicinity|close|beside|next|adjacent|immediate|border"
43
+ return near_kwds
44
+
45
+ def get_surrounding_regex():
46
+ surrounding_kwds = "surrounding|neigbourhood|proximity|territory|locality"
47
+ return surrounding_kwds
48
+ def get_level1_regex():
49
+ level_1_regex = "(?i)("+get_directional_regex()+"|"+get_center_regex()+")"
50
+ return level_1_regex
51
+
52
+ def get_level2_regex():
53
+ level_2_regex = "(?i)("+get_near_regex()+"|"+get_surrounding_regex()+")"
54
+ return level_2_regex
55
+
56
+ def get_level3_regex():
57
+ level_3_regex = "(?i)("+get_number_regex()+one_plus+get_space_regex()+zero_plus+get_quantities_regex()+one_plus+")"
58
+ return level_3_regex
59
+
60
+
61
+
62
+ def get_keywords():
63
+ keywords = []
64
+ keywords = get_directional_regex().split("|")
65
+ keywords.extend(get_near_regex().split("|"))
66
+ keywords.extend(get_surrounding_regex().split("|"))
67
+ keywords.extend(get_center_regex().split("|"))
68
+ keywords.append(",")
69
+ keywords.append("and")
70
+ keywords.append(".")
71
+
72
+ return keywords