Spaces:
Sleeping
Sleeping
Nick Sorros
commited on
Commit
·
fd5a1b3
1
Parent(s):
4709571
Update tagged grants
Browse files- tag.py +8 -4
- tagged_grants.jsonl +0 -0
tag.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import json
|
2 |
|
3 |
from transformers import AutoModel, AutoTokenizer
|
|
|
4 |
import srsly
|
5 |
import typer
|
6 |
|
@@ -22,11 +23,14 @@ def tag(data_path, tagged_data_path, sample_size: int = 10):
|
|
22 |
)
|
23 |
|
24 |
texts = [grant["title_and_description"] for grant in data]
|
25 |
-
|
26 |
-
|
27 |
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
30 |
|
31 |
srsly.write_jsonl(tagged_data_path, data)
|
32 |
|
|
|
1 |
import json
|
2 |
|
3 |
from transformers import AutoModel, AutoTokenizer
|
4 |
+
from tqdm import tqdm
|
5 |
import srsly
|
6 |
import typer
|
7 |
|
|
|
23 |
)
|
24 |
|
25 |
texts = [grant["title_and_description"] for grant in data]
|
26 |
+
for batch_index in tqdm(range(0, len(texts), 10)):
|
27 |
+
batch_texts = texts[batch_index:batch_index+10]
|
28 |
|
29 |
+
inputs = tokenizer(batch_texts, padding="max_length")
|
30 |
+
labels = model(**inputs, return_labels=True)
|
31 |
+
|
32 |
+
for i, tags in enumerate(labels):
|
33 |
+
data[i]["tags"] = tags
|
34 |
|
35 |
srsly.write_jsonl(tagged_data_path, data)
|
36 |
|
tagged_grants.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|