zekun-li commited on
Commit
d2fca5c
·
verified ·
1 Parent(s): 5b91a05

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
3
+ import gradio as gr
4
+
5
+ # Load model and tokenizer once
6
+ model_name = "zekun-li/geolm-base-toponym-recognition"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
9
+ model.to("cpu") # Use "cuda" if you have GPU
10
+ model.eval()
11
+
12
+ # Extract token spans labeled as toponyms
13
+ def get_toponym_entities(text):
14
+ inputs = tokenizer(
15
+ text,
16
+ return_offsets_mapping=True,
17
+ return_tensors="pt",
18
+ truncation=True,
19
+ max_length=512,
20
+ )
21
+ offset_mapping = inputs.pop("offset_mapping")[0]
22
+ input_ids = inputs["input_ids"]
23
+
24
+ with torch.no_grad():
25
+ outputs = model(**inputs)
26
+ predictions = torch.argmax(outputs.logits, dim=2)[0]
27
+
28
+ entities = []
29
+ for idx, label_id in enumerate(predictions):
30
+ if label_id != 0 and idx < len(offset_mapping):
31
+ start, end = offset_mapping[idx].tolist()
32
+ if end > start:
33
+ entities.append((start, end, "Toponym"))
34
+
35
+ return {"text": text, "entities": entities}
36
+
37
+ # Launch Gradio app
38
+ demo = gr.Interface(
39
+ fn=get_toponym_entities,
40
+ inputs=gr.Textbox(lines=10, placeholder="Enter text with place names..."),
41
+ outputs=gr.HighlightedText(),
42
+ title="🌍 Toponym Recognition with GeoLM",
43
+ description="Enter a paragraph and detect highlighted place names using the zekun-li/geolm-base-toponym-recognition model.",
44
+ examples=[
45
+ ["Minneapolis, officially the City of Minneapolis, is a city in Minnesota."],
46
+ ["Los Angeles is the most populous city in California."],
47
+ ],
48
+ )
49
+
50
+ if __name__ == "__main__":
51
+ demo.launch()