# 这不会失败 import subprocess subprocess.run(["pip", "install", "streamlit"]) import streamlit # import subprocess # import importlib.util # import os # # 只在 geospacy 没有被安装时执行安装（避免重复装） # if importlib.util.find_spec("geospacy") is None: # subprocess.run( # ["pip", "install", "--no-deps", "-r", "requirements_geospacy.txt"], # check=True # ) # import streamlit as st # from spacy import displacy # import spacy # import geospacy # from PIL import Image # import base64 # import sys # import pandas as pd # import en_core_web_md # from spacy.tokens import Span, Doc, Token # from utils import geoutil # import llm_coding # import urllib.parse # colors = {'GPE': "#43c6fc", "LOC": "#fd9720", "RSE":"#a6e22d"} # options = {"ents": ['GPE', 'LOC', "RSE"], "colors": colors} # HTML_WRAPPER = """

{}

""" # model = "" # gpe_selected = "GPE" # loc_selected = "LOC" # rse_selected = "RSE" # types = "" # #BASE_URL = "http://localhost:8080/" # BASE_URL = "" # def set_header(): # LOGO_IMAGE = "tetis-1.png" # st.markdown( # """ # # """, # unsafe_allow_html=True # ) # st.markdown( # f""" #

GeOspaCy

# """, # unsafe_allow_html=True # ) # def set_side_menu(): # global gpe_selected, loc_selected, rse_selected, model, types # types ="" # params = st.experimental_get_query_params() # # params = st.query_params # # print(params, 777) # st.sidebar.markdown("## Spacy Model") # st.sidebar.markdown("You can **select** the values of the *spacy model* from Dropdown.") # models = ['en_core_web_sm', 'en_core_web_md', 'en_core_web_lg', 'en_core_web_trf'] # if "model" in params: # default_ix = models.index(params["model"][0]) # else: # default_ix = models.index('en_core_web_sm') # model = st.sidebar.selectbox('Spacy Model',models, index=default_ix) # st.sidebar.markdown("## Spatial Entity Labels") # st.sidebar.markdown("**Mark** the Spatial Entities you want to extract?") # tpes = "" # if "type" in params: # tpes = params['type'][0] # if "g" in tpes: # gpe = st.sidebar.checkbox('GPE', value = True) # else: # gpe = st.sidebar.checkbox('GPE') # if "l" in tpes: # loc = st.sidebar.checkbox('LOC', value = True) # else: # loc = st.sidebar.checkbox('LOC') # if "r" in tpes: # rse = st.sidebar.checkbox('RSE', value = True) # else: # rse = st.sidebar.checkbox('RSE') # if(gpe): # gpe_selected ="GPE" # types+="g" # if(loc): # loc_selected ="LOC" # types+="l" # if(rse): # rse_selected ="RSE" # types+="r" # def set_input(): # params = st.experimental_get_query_params() # # params = st.query_params # if "text" not in params: # text = st.text_area("Input unstructured text:", "") # else: # text = st.text_area("Enter the text to extract {Spatial Entities}", params["text"][0]) # if(st.button("Extract")): # # return 'France has detected a highly pathogenic strain of bird flu in a pet shop near Paris, days after an identical outbreak in one of Corsica’s main cities.' # return 'I would like to know where is the area between Burwood and Glebe. Pyrmont.' # return '5 km east of Burwood. 3 km south of Glebe. Between Pyrmont and Glebe.' # # return 'Between Burwood and Pyrmont.' # # return 'Between Burwood and Glebe.' # # return 'Between Burwood and Darling Harbour.' # # return 'Between China and USA.' # # return 'The Burwood city.' # # text = "New York is north of Washington. Between Burwood and Pyrmont city." # return text # def set_selected_entities(doc): # global gpe_selected, loc_selected, rse_selected, model # ents = [ent for ent in doc.ents if ent.label_ == gpe_selected or ent.label_ == loc_selected or ent.label_ == rse_selected] # doc.ents = ents # return doc # def extract_spatial_entities(text): # # nlp = en_core_web_md.load() # # nlp = spacy.load("en_core_web_md") # # nlp.add_pipe("spatial_pipeline", after="ner") # # doc = nlp(text) # # doc = set_selected_entities(doc) # # html = displacy.render(doc, style="ent", options=options) # # html = html.replace("\n", "") # # st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True) # # show_spatial_ent_table(doc, text) # nlp = spacy.load("en_core_web_md") ##### # nlp.add_pipe("spatial_pipeline", after="ner") # doc = nlp(text) # # 分句处理 # sent_ents = [] # sent_texts = [] # sent_rse_id = [] # offset = 0 # 记录当前 token 偏移量 # sent_start_positions = [0] # 记录句子信息 # doc_copy = doc.copy() # 用于展示方程组合 # for sent in doc.sents: # sent_doc = nlp(sent.text) # 逐句处理 # sent_doc = set_selected_entities(sent_doc) # 这里处理实体 # sent_texts.append(sent_doc.text) # for ent in sent_doc.ents: # sent_rse_id.append(ent._.rse_id) # # **调整每个实体的索引，使其匹配完整文本** # for ent in sent_doc.ents: # new_ent = Span(doc, ent.start + offset, ent.end + offset, label=ent.label_) # sent_ents.append(new_ent) # offset += len(sent) # 更新偏移量 # sent_start_positions.append(sent_start_positions[-1] + len(sent)) # 记录句子起点 # # **创建新 Doc** # final_doc = Doc(nlp.vocab, words=[token.text for token in doc], spaces=[token.whitespace_ for token in doc]) # for i in sent_start_positions: # 手动标记句子起始点 # if i < len(final_doc): # final_doc[i].is_sent_start = True # # **设置实体** # final_doc.set_ents(sent_ents) # for i in range(len(sent_rse_id)): # final_doc.ents[i]._.rse_id = sent_rse_id[i] # print(doc.ents[0].sent, '原始') # doc = final_doc # print(doc.ents[0].sent, '新') # # 分句处理完毕 # # doc = set_selected_entities(doc) # doc.to_disk("saved_doc.spacy") # html = displacy.render(doc,style="ent", options = options) # html = html.replace("\n","") # st.write(HTML_WRAPPER.format(html),unsafe_allow_html=True) # show_spatial_ent_table(doc, text) # st.markdown("123123") # show_sentence_selector_table(doc_copy) # def show_sentence_selector_table(doc_copy): # st.markdown("**______________________________________________________________________________________**") # st.markdown("**Sentence Selector for Geographic Composition**") # # 提取句子 # sentences = list(doc_copy.sents) # # 构建表格数据 # rows = [] # for idx, sent in enumerate(sentences): # sentence_text = sent.text.strip() # # 生成跳转链接（定位到Tagger） # url = BASE_URL + "Tagger?mode=geocombo&text=" + urllib.parse.quote(sentence_text) # new_row = { # 'Sr.': idx + 1, # 'sentence': sentence_text, # 'Select': f'Select this sentence' # } # rows.append(new_row) # # 转为 DataFrame 并渲染为 HTML # df = pd.DataFrame(rows) # st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True) # def show_spatial_ent_table(doc, text): # global types # if len(doc.ents) > 0: # st.markdown("**______________________________________________________________________________________**") # st.markdown("**Spatial Entities List**") # # 初始化一个空 DataFrame # df = pd.DataFrame(columns=['Sr.', 'entity', 'label', 'Map', 'GEOJson']) # rows = [] # 用于存储所有行 # for ent in doc.ents: # url_map = BASE_URL + "Tagger?map=true&type=" + types + "&model=" + model + "&text=" + text + "&entity=" + ent._.rse_id # print(url_map, 'uuurrr') # print(ent._.rse_id, 'pppp') # url_json = BASE_URL + "Tagger?geojson=true&type=" + types + "&model=" + model + "&text=" + text + "&entity=" + ent._.rse_id # # 创建新行 # new_row = { # 'Sr.': len(rows) + 1, # 'entity': ent.text, # 'label': ent.label_, # 'Map': f'View', # 'GEOJson': f'View' # } # rows.append(new_row) # 将新行添加到列表中 # # 将所有行转为 DataFrame # df = pd.DataFrame(rows) # # 使用 Streamlit 显示 HTML 表格 # st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True) # # params = st.experimental_get_query_params() # # params = st.query_params # # ase, level_1, level_2, level_3 = geoutil.get_ent(params["entity"][0]) # # print(geoutil.get_ent(params), 'ppppp') # def set_header(): # tetis Geospacy LOGO # LOGO_IMAGE = "title.jpg" # st.markdown( # """ # # """, # unsafe_allow_html=True # ) # st.markdown( # f""" #

SpatialParse

# """, # unsafe_allow_html=True # ) # def set_side_menu(): # global gpe_selected, loc_selected, rse_selected, model, types # types = "" # params = st.experimental_get_query_params() # st.sidebar.markdown("## Deployment Method") # st.sidebar.markdown("You can select the deployment method for the model.") # deployment_options = ["API", "Local deployment"] # use_local_model = st.sidebar.radio("Choose deployment method:", deployment_options, index=0) == "Local deployment" # if use_local_model: # local_model_path = st.sidebar.text_input("Enter local model path:", "") # st.sidebar.markdown("## LLM Model") # st.sidebar.markdown("You can **select** different *LLM model* powered by API.") # models = ['Llama-3-8B', 'Mistral-7B-0.3', 'Gemma-2-10B', 'GPT-4o', 'Gemini Pro', 'Deepseek-R1', 'en_core_web_sm', 'en_core_web_md', 'en_core_web_lg', 'en_core_web_trf'] # if "model" in params: # default_ix = models.index(params["model"][0]) # else: # default_ix = models.index('GPT-4o') # model = st.sidebar.selectbox('LLM Model', models, index=default_ix) # st.sidebar.markdown("## Spatial Entity Labels") # st.sidebar.markdown("Please **Mark** the Spatial Entities you want to extract.") # tpes = "" # if "type" in params: # tpes = params['type'][0] # st.sidebar.markdown("### Absolute Spatial Entity:") # if "g" in tpes: # gpe = st.sidebar.checkbox('GPE', value=True) # else: # gpe = st.sidebar.checkbox('GPE') # if "l" in tpes: # loc = st.sidebar.checkbox('LOC', value=True) # else: # loc = st.sidebar.checkbox('LOC') # st.sidebar.markdown("### Relative Spatial Entity:") # if "r" in tpes: # rse = st.sidebar.checkbox('RSE', value=True) # else: # rse = st.sidebar.checkbox('RSE') # if (gpe): # gpe_selected = "GPE" # types += "g" # if (loc): # loc_selected = "LOC" # types += "l" # if (rse): # rse_selected = "RSE" # types += "r" # def main(): # global gpe_selected, loc_selected, rse_selected, model # #print(displacy.templates.TPL_ENT) # set_header() # set_side_menu() # text = set_input() # if(text is not None): # extract_spatial_entities(text) # elif "text" in st.session_state: # text = st.session_state.text # extract_spatial_entities(text) # if __name__ == '__main__': # main()