Spaces:

zekun-li
/

geolm-base-toponym-recognition-demo

Sleeping

App Files Files Community

zekun-li commited on 13 days ago

Commit

d2fca5c

verified ·

1 Parent(s): 5b91a05

Create app.py

Browse files

Files changed (1) hide show

app.py +51 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+import gradio as gr
+# Load model and tokenizer once
+model_name = "zekun-li/geolm-base-toponym-recognition"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForTokenClassification.from_pretrained(model_name)
+model.to("cpu")  # Use "cuda" if you have GPU
+model.eval()
+# Extract token spans labeled as toponyms
+def get_toponym_entities(text):
+    inputs = tokenizer(
+        text,
+        return_offsets_mapping=True,
+        return_tensors="pt",
+        truncation=True,
+        max_length=512,
+    )
+    offset_mapping = inputs.pop("offset_mapping")[0]
+    input_ids = inputs["input_ids"]
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predictions = torch.argmax(outputs.logits, dim=2)[0]
+    entities = []
+    for idx, label_id in enumerate(predictions):
+        if label_id != 0 and idx < len(offset_mapping):
+            start, end = offset_mapping[idx].tolist()
+            if end > start:
+                entities.append((start, end, "Toponym"))
+    return {"text": text, "entities": entities}
+# Launch Gradio app
+demo = gr.Interface(
+    fn=get_toponym_entities,
+    inputs=gr.Textbox(lines=10, placeholder="Enter text with place names..."),
+    outputs=gr.HighlightedText(),
+    title="🌍 Toponym Recognition with GeoLM",
+    description="Enter a paragraph and detect highlighted place names using the zekun-li/geolm-base-toponym-recognition model.",
+    examples=[
+        ["Minneapolis, officially the City of Minneapolis, is a city in Minnesota."],
+        ["Los Angeles is the most populous city in California."],
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()