manaviel85370
add pages and all
da88570
from label_studio_sdk import Client
LABEL_STUDIO_URL = 'http://localhost:8080'
API_KEY = 'aad38f54021d443b17395123304a7c01001b55af'
ls = Client(url=LABEL_STUDIO_URL, api_key=API_KEY)
print(ls.check_connection())
# Load and preprocess sample data
from datasets import load_dataset
from tqdm import tqdm
# We don't need a ton of data, so we'll only look at the training set for now
dataset = load_dataset("MultiCoNER/multiconer_v2", "English (EN)")["train"]
medical_labels = ["Medication/Vaccine", "MedicalProcedure", "AnatomicalStructure", "Symptom", "Disease"]
# Filter so we only look at samples with medical tags
medical_dataset = []
for item in tqdm(dataset):
has_medical = any(any(label in tag for label in medical_labels) for tag in item["ner_tags"])
if has_medical:
# We want the text as a full text and not a list of tokens, so we create that as another key value pair in the item dictionary
item["text"] = " ".join(item["tokens"])
medical_dataset.append(item)
project = ls.start_project(
title='Medical NER with GLiNER',
label_config='''
<View>
<Labels name="label" toName="text">
<Label value="Medication/Vaccine" background="red"/>
<Label value="MedicalProcedure" background="blue"/>
<Label value="AnatomicalStructure" background="orange"/>
<Label value="Symptom" background="green"/>
<Label value="Disease" background="purple"/>
</Labels>
<Text name="text" value="$text"/>
</View>
'''
)
project.import_tasks(medical_dataset)