Spaces:
Runtime error
Runtime error
File size: 7,975 Bytes
17e77ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
from openai import OpenAI
import re
from utils.config import api_key
client = OpenAI(
api_key=api_key
)
model = "gpt-3.5-turbo"
model = "gpt-4o"
def extract_GPE(text):
system_prompt = '''You are a professional geographer. Your task is to extract all geopolitical entities from a given text. Geopolitical entities can include countries, regions, cities, autonomous regions, or other administrative divisions. For each geopolitical entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
sent = 'Where is France?'
math_bot_messages = [
{"role": "system",
"content": system_prompt},
{"role": "user", "content": text},
]
chat_completion = client.chat.completions.create(
messages=math_bot_messages,
model=model,
)
result = chat_completion.choices[0].message.content
return result
def extract_LOC(text):
system_prompt = '''You are a professional geographer. Your task is to extract all location entities (LOC) from a given text. Location entities can include physical locations such as landmarks, geographical features, mountains, rivers, oceans, and places, but do not include political or administrative divisions such as countries or cities (these are considered geopolitical entities). For each location entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"China and India are two of the most populous countries in Asia."\n\nExpected Output:\n[###China###, ###India###]'''
sent = 'The Grand Canyon is one of the most spectacular natural wonders in the world, located in the state of Arizona. Nearby, the Colorado River flows through the canyon, carving its way through the rugged terrain. In the north, the Rocky Mountains stretch across several states, including Colorado and Wyoming.'
math_bot_messages = [
{"role": "system",
"content": system_prompt},
{"role": "user", "content": text},
]
chat_completion = client.chat.completions.create(
messages=math_bot_messages,
model=model,
)
result = chat_completion.choices[0].message.content
return result
def extract_RSE_1(text):
system_prompt = '''You are a professional geographer. Your task is to extract all spatial entities (directional keywords) from a given text. Spatial entities can include directional keywords such as north, south, east, west, and more specific terms like northeast, northwest, southeast, southwest, as well as terms indicating locations like center, central, downtown, and midtown. For each spatial entity, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The hotel is located in the downtown area of New York, just south of Central Park, with a beautiful view of the southeast corner."\n\nExpected Output:\n[###downtown###, ###south###, ###southeast###]'''
sent = 'The train station is situated in the central part of the city, just north of the river and east of the main square.'
math_bot_messages = [
{"role": "system",
"content": system_prompt},
{"role": "user", "content": text},
]
chat_completion = client.chat.completions.create(
messages=math_bot_messages,
model=model,
)
result = chat_completion.choices[0].message.content
return result
def extract_RSE_2(text):
system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy spatial entities (keywords) from a given text. Fuzzy spatial keywords can include terms like nearby, near, vicinity, close, beside, next, adjacent, immediate, border, surrounding, neighbourhood, proximity, territory, locality, and similar terms. For each fuzzy spatial keyword, wrap the name in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located nearby the lake, with several cafes close to the walking paths, and a small garden adjacent to the main entrance."\n\nExpected Output:\n[###nearby###, ###close###, ###adjacent###]'''
sent = 'The village is situated in the vicinity of the mountain range, with a small river flowing beside the houses and several farms next to the road.'
math_bot_messages = [
{"role": "system",
"content": system_prompt},
{"role": "user", "content": text},
]
chat_completion = client.chat.completions.create(
messages=math_bot_messages,
model=model,
)
result = chat_completion.choices[0].message.content
return result
def extract_RSE_3(text):
system_prompt = '''You are a professional geographer. Your task is to extract all fuzzy distance keywords from a given text. Fuzzy distance keywords include numeric values followed by distance units such as kilometer, mile, meter, foot, inch, centimeter, and other related units. The distance units can be in different formats, such as km, m, mi, ft, yd, cm, mm, or even in full words like kilometer, mile, or inch. For each fuzzy distance keyword, wrap the entire expression (number and unit) in a unique character sequence, such as [###ENTITY###]. If there are multiple entities, output them in the following format:\n[###ENTITY1###, ###ENTITY2###, ###ENTITY3###]\nHere is an example:\n Example:\n\nText:\n"The park is located 3 km away from the city center, while the nearest supermarket is only 500 meters from here, and the lake is about 1 mile further down the road."\n\nExpected Output:\n[###3 km###, ###500 meters###, ###1 mile###]'''
sent = 'The school is located approximately 2 miles from the station, while the nearest bus stop is 200 meters away. The hiking trail is about 5 kilometers east of the town center.'
math_bot_messages = [
{"role": "system",
"content": system_prompt},
{"role": "user", "content": text},
]
chat_completion = client.chat.completions.create(
messages=math_bot_messages,
model=model,
)
result = chat_completion.choices[0].message.content
return result
def extract(respond, entity_type):
"""
从输入字符串中提取被 ### 包裹的实体,并将其映射到给定的实体类型。
:param respond: 包含实体的字符串,例如 '[###2 miles###, ###200 meters###, ###5 kilometers###]'
:param entity_type: 给所有提取的实体赋予的类型(字符串)
:return: 一个字典,键是提取出的实体,值是相同的 entity_type
"""
# 提取被 ### 包裹的内容
extracted_entities = re.findall(r'###(.*?)###', respond)
# 生成字典,将所有实体映射到同一个类型
entity_dict = {entity.strip(): entity_type for entity in extracted_entities}
return entity_dict
def llmapi(text):
system_prompt = '请你填入需要的提示'
math_bot_messages = [
{"role": "system",
"content": system_prompt},
{"role": "user", "content": text},
]
chat_completion = client.chat.completions.create(
messages=math_bot_messages,
model=model,
)
result = chat_completion.choices[0].message.content
return result
# print(extract('[###2 miles###, ###200 meters###, ###5 kilometers###]', 'rse'))
# print(extract(extract_GPE('Between Burwood and Glebe.'), 'gpe'))
|