File size: 1,705 Bytes
d2fca5c 58dd7a5 d2fca5c 58dd7a5 d2fca5c 58dd7a5 d2fca5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
import gradio as gr
# Load model and tokenizer once
model_name = "zekun-li/geolm-base-toponym-recognition"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
model.to("cpu") # Use "cuda" if you have GPU
model.eval()
def get_toponym_entities(text):
inputs = tokenizer(
text,
return_offsets_mapping=True,
return_tensors="pt",
truncation=True,
max_length=512,
)
offset_mapping = inputs.pop("offset_mapping")[0]
input_ids = inputs["input_ids"]
with torch.no_grad():
outputs = model(**inputs)
predictions = torch.argmax(outputs.logits, dim=2)[0]
entities = []
for idx, label_id in enumerate(predictions):
if label_id != 0 and idx < len(offset_mapping):
start, end = offset_mapping[idx].tolist()
if end > start:
entities.append({"start": start, "end": end, "entity": "Toponym"})
return {"text": text, "entities": entities}
# Launch Gradio app
demo = gr.Interface(
fn=get_toponym_entities,
inputs=gr.Textbox(lines=10, placeholder="Enter text with place names..."),
outputs=gr.HighlightedText(),
title="π Toponym Recognition with GeoLM",
description="Enter a paragraph and detect highlighted place names using the zekun-li/geolm-base-toponym-recognition model.",
examples=[
["Minneapolis, officially the City of Minneapolis, is a city in Minnesota."],
["Los Angeles is the most populous city in California."],
],
)
if __name__ == "__main__":
demo.launch()
|