Spaces:

SpatialWebAgent
/

SpatialParseback

Build error

SpatialParseback / utils /llm_ent_extract.py

Shunfeng Zheng

Upload 83 files

4c425e5 verified 24 days ago

7.98 kB

	from openai import OpenAI
	import re
	from utils.config import api_key

	client = OpenAI(
	api_key=api_key
	)

	model = "gpt-3.5-turbo"
	model = "gpt-4o"

	def extract_GPE(text):
	system_prompt = '''You are a professional geographer. Your task is to extract all geopolitical entities from a given text. Geopolitical entities can include countries, regions, cities, autonomous regions, or other administrative divisions. For each geopolitical entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
	sent = 'Where is France?'

	math_bot_messages = [
	{"role": "system",
	"content": system_prompt},
	{"role": "user", "content": text},
	]

	chat_completion = client.chat.completions.create(
	messages=math_bot_messages,
	model=model,
	)

	result = chat_completion.choices[0].message.content
	return result


	def extract_LOC(text):
	system_prompt = '''You are a professional geographer. Your task is to extract all location entities (LOC) from a given text. Location entities can include physical locations such as landmarks, geographical features, mountains, rivers, oceans, and places, but do not include political or administrative divisions such as countries or cities (these are considered geopolitical entities). For each location entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
	sent = 'The Grand Canyon is one of the most spectacular natural wonders in the world, located in the state of Arizona. Nearby, the Colorado River flows through the canyon, carving its way through the rugged terrain. In the north, the Rocky Mountains stretch across several states, including Colorado and Wyoming.'

	math_bot_messages = [
	{"role": "system",
	"content": system_prompt},
	{"role": "user", "content": text},
	]

	chat_completion = client.chat.completions.create(
	messages=math_bot_messages,
	model=model,
	)

	result = chat_completion.choices[0].message.content
	return result


	def extract_RSE_1(text):
	system_prompt = '''You are a professional geographer. Your task is to extract all spatial entities (directional keywords) from a given text. Spatial entities can include directional keywords such as north, south, east, west, and more specific terms like northeast, northwest, southeast, southwest, as well as terms indicating locations like center, central, downtown, and midtown. For each spatial entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The hotel is located in the downtown area of New York, just south of Central Park, with a beautiful view of the southeast corner."\n\nExpected Output:\n[###downtown###, ###south###, ###southeast###]'''
	sent = 'The train station is situated in the central part of the city, just north of the river and east of the main square.'

	math_bot_messages = [
	{"role": "system",
	"content": system_prompt},
	{"role": "user", "content": text},
	]

	chat_completion = client.chat.completions.create(
	messages=math_bot_messages,
	model=model,
	)

	result = chat_completion.choices[0].message.content
	return result


	def extract_RSE_2(text):
	system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy spatial entities (keywords) from a given text. Fuzzy spatial keywords can include terms like nearby, near, vicinity, close, beside, next, adjacent, immediate, border, surrounding, neighbourhood, proximity, territory, locality, and similar terms. For each fuzzy spatial keyword, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located nearby the lake, with several cafes close to the walking paths, and a small garden adjacent to the main entrance."\n\nExpected Output:\n[###nearby###, ###close###, ###adjacent###]'''
	sent = 'The village is situated in the vicinity of the mountain range, with a small river flowing beside the houses and several farms next to the road.'

	math_bot_messages = [
	{"role": "system",
	"content": system_prompt},
	{"role": "user", "content": text},
	]

	chat_completion = client.chat.completions.create(
	messages=math_bot_messages,
	model=model,
	)

	result = chat_completion.choices[0].message.content
	return result


	def extract_RSE_3(text):
	system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy distance keywords from a given text. Fuzzy distance keywords include numeric values followed by distance units such as kilometer, mile, meter, foot, inch, centimeter, and other related units. The distance units can be in different formats, such as km, m, mi, ft, yd, cm, mm, or even in full words like kilometer, mile, or inch. For each fuzzy distance keyword, wrap the entire expression (number and unit) in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located 3 km away from the city center, while the nearest supermarket is only 500 meters from here, and the lake is about 1 mile further down the road."\n\nExpected Output:\n[###3 km###, ###500 meters###, ###1 mile###]'''
	sent = 'The school is located approximately 2 miles from the station, while the nearest bus stop is 200 meters away. The hiking trail is about 5 kilometers east of the town center.'

	math_bot_messages = [
	{"role": "system",
	"content": system_prompt},
	{"role": "user", "content": text},
	]

	chat_completion = client.chat.completions.create(
	messages=math_bot_messages,
	model=model,
	)

	result = chat_completion.choices[0].message.content
	return result


	def extract(respond, entity_type):
	"""
	从输入字符串中提取被 ### 包裹的实体，并将其映射到给定的实体类型。

	:param respond: 包含实体的字符串，例如 '[###2 miles###, ###200 meters###, ###5 kilometers###]'
	:param entity_type: 给所有提取的实体赋予的类型（字符串）
	:return: 一个字典，键是提取出的实体，值是相同的 entity_type
	"""
	# 提取被 ### 包裹的内容
	extracted_entities = re.findall(r'###(.*?)###', respond)

	# 生成字典，将所有实体映射到同一个类型
	entity_dict = {entity.strip(): entity_type for entity in extracted_entities}

	return entity_dict


	def llmapi(text):
	system_prompt = '请你填入需要的提示'
	math_bot_messages = [
	{"role": "system",
	"content": system_prompt},
	{"role": "user", "content": text},
	]

	chat_completion = client.chat.completions.create(
	messages=math_bot_messages,
	model=model,
	)

	result = chat_completion.choices[0].message.content
	return result


	# print(extract('[###2 miles###, ###200 meters###, ###5 kilometers###]', 'rse'))
	# print(extract(extract_GPE('Between Burwood and Glebe.'), 'gpe'))