|
from label_studio_sdk import Client |
|
|
|
LABEL_STUDIO_URL = 'http://localhost:8080' |
|
API_KEY = 'aad38f54021d443b17395123304a7c01001b55af' |
|
ls = Client(url=LABEL_STUDIO_URL, api_key=API_KEY) |
|
print(ls.check_connection()) |
|
|
|
|
|
from datasets import load_dataset |
|
from tqdm import tqdm |
|
|
|
|
|
dataset = load_dataset("MultiCoNER/multiconer_v2", "English (EN)")["train"] |
|
medical_labels = ["Medication/Vaccine", "MedicalProcedure", "AnatomicalStructure", "Symptom", "Disease"] |
|
|
|
|
|
medical_dataset = [] |
|
for item in tqdm(dataset): |
|
has_medical = any(any(label in tag for label in medical_labels) for tag in item["ner_tags"]) |
|
if has_medical: |
|
|
|
item["text"] = " ".join(item["tokens"]) |
|
medical_dataset.append(item) |
|
|
|
project = ls.start_project( |
|
title='Medical NER with GLiNER', |
|
label_config=''' |
|
<View> |
|
<Labels name="label" toName="text"> |
|
<Label value="Medication/Vaccine" background="red"/> |
|
<Label value="MedicalProcedure" background="blue"/> |
|
<Label value="AnatomicalStructure" background="orange"/> |
|
<Label value="Symptom" background="green"/> |
|
<Label value="Disease" background="purple"/> |
|
</Labels> |
|
|
|
<Text name="text" value="$text"/> |
|
</View> |
|
''' |
|
) |
|
|
|
project.import_tasks(medical_dataset) |