georad commited on
Commit
f525638
Β·
verified Β·
1 Parent(s): b3d5435

Upload 19 files

Browse files
README.md CHANGED
@@ -1,13 +1,2 @@
1
- ---
2
- title: MediNER
3
- emoji: 🌍
4
- colorFrom: green
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.43.1
8
- app_file: app.py
9
- pinned: false
10
- short_description: Performs named entity recognition for medical entities.
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # medNER_V2
2
+ This app performs Named Entity REcognition of medical entties.
 
 
 
 
 
 
 
 
 
 
 
checkpoint-92/config.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dslim/distilbert-NER",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "O",
13
+ "1": "B-ACTIVITY",
14
+ "2": "I-ACTIVITY",
15
+ "3": "I-ADMINISTRATION",
16
+ "4": "B-ADMINISTRATION",
17
+ "5": "B-AGE",
18
+ "6": "I-AGE",
19
+ "7": "I-AREA",
20
+ "8": "B-AREA",
21
+ "9": "B-BIOLOGICAL_ATTRIBUTE",
22
+ "10": "I-BIOLOGICAL_ATTRIBUTE",
23
+ "11": "I-BIOLOGICAL_STRUCTURE",
24
+ "12": "B-BIOLOGICAL_STRUCTURE",
25
+ "13": "B-CLINICAL_EVENT",
26
+ "14": "I-CLINICAL_EVENT",
27
+ "15": "B-COLOR",
28
+ "16": "I-COLOR",
29
+ "17": "I-COREFERENCE",
30
+ "18": "B-COREFERENCE",
31
+ "19": "B-DATE",
32
+ "20": "I-DATE",
33
+ "21": "I-DETAILED_DESCRIPTION",
34
+ "22": "B-DETAILED_DESCRIPTION",
35
+ "23": "I-DIAGNOSTIC_PROCEDURE",
36
+ "24": "B-DIAGNOSTIC_PROCEDURE",
37
+ "25": "I-DISEASE_DISORDER",
38
+ "26": "B-DISEASE_DISORDER",
39
+ "27": "B-DISTANCE",
40
+ "28": "I-DISTANCE",
41
+ "29": "B-DOSAGE",
42
+ "30": "I-DOSAGE",
43
+ "31": "I-DURATION",
44
+ "32": "B-DURATION",
45
+ "33": "I-FAMILY_HISTORY",
46
+ "34": "B-FAMILY_HISTORY",
47
+ "35": "B-FREQUENCY",
48
+ "36": "I-FREQUENCY",
49
+ "37": "I-HEIGHT",
50
+ "38": "B-HEIGHT",
51
+ "39": "B-HISTORY",
52
+ "40": "I-HISTORY",
53
+ "41": "I-LAB_VALUE",
54
+ "42": "B-LAB_VALUE",
55
+ "43": "I-MASS",
56
+ "44": "B-MASS",
57
+ "45": "I-MEDICATION",
58
+ "46": "B-MEDICATION",
59
+ "47": "I-NONBIOLOGICAL_LOCATION",
60
+ "48": "B-NONBIOLOGICAL_LOCATION",
61
+ "49": "I-OCCUPATION",
62
+ "50": "B-OCCUPATION",
63
+ "51": "B-OTHER_ENTITY",
64
+ "52": "I-OTHER_ENTITY",
65
+ "53": "B-OTHER_EVENT",
66
+ "54": "I-OTHER_EVENT",
67
+ "55": "I-OUTCOME",
68
+ "56": "B-OUTCOME",
69
+ "57": "I-PERSONAL_BACKGROUND",
70
+ "58": "B-PERSONAL_BACKGROUND",
71
+ "59": "B-QUALITATIVE_CONCEPT",
72
+ "60": "I-QUALITATIVE_CONCEPT",
73
+ "61": "I-QUANTITATIVE_CONCEPT",
74
+ "62": "B-QUANTITATIVE_CONCEPT",
75
+ "63": "B-SEVERITY",
76
+ "64": "I-SEVERITY",
77
+ "65": "B-SEX",
78
+ "66": "I-SEX",
79
+ "67": "B-SHAPE",
80
+ "68": "I-SHAPE",
81
+ "69": "B-SIGN_SYMPTOM",
82
+ "70": "I-SIGN_SYMPTOM",
83
+ "71": "B-SUBJECT",
84
+ "72": "I-SUBJECT",
85
+ "73": "B-TEXTURE",
86
+ "74": "I-TEXTURE",
87
+ "75": "B-THERAPEUTIC_PROCEDURE",
88
+ "76": "I-THERAPEUTIC_PROCEDURE",
89
+ "77": "I-TIME",
90
+ "78": "B-TIME",
91
+ "79": "B-VOLUME",
92
+ "80": "I-VOLUME",
93
+ "81": "I-WEIGHT",
94
+ "82": "B-WEIGHT"
95
+ },
96
+ "initializer_range": 0.02,
97
+ "label2id": {
98
+ "B-ACTIVITY": 1,
99
+ "B-ADMINISTRATION": 4,
100
+ "B-AGE": 5,
101
+ "B-AREA": 8,
102
+ "B-BIOLOGICAL_ATTRIBUTE": 9,
103
+ "B-BIOLOGICAL_STRUCTURE": 12,
104
+ "B-CLINICAL_EVENT": 13,
105
+ "B-COLOR": 15,
106
+ "B-COREFERENCE": 18,
107
+ "B-DATE": 19,
108
+ "B-DETAILED_DESCRIPTION": 22,
109
+ "B-DIAGNOSTIC_PROCEDURE": 24,
110
+ "B-DISEASE_DISORDER": 26,
111
+ "B-DISTANCE": 27,
112
+ "B-DOSAGE": 29,
113
+ "B-DURATION": 32,
114
+ "B-FAMILY_HISTORY": 34,
115
+ "B-FREQUENCY": 35,
116
+ "B-HEIGHT": 38,
117
+ "B-HISTORY": 39,
118
+ "B-LAB_VALUE": 42,
119
+ "B-MASS": 44,
120
+ "B-MEDICATION": 46,
121
+ "B-NONBIOLOGICAL_LOCATION": 48,
122
+ "B-OCCUPATION": 50,
123
+ "B-OTHER_ENTITY": 51,
124
+ "B-OTHER_EVENT": 53,
125
+ "B-OUTCOME": 56,
126
+ "B-PERSONAL_BACKGROUND": 58,
127
+ "B-QUALITATIVE_CONCEPT": 59,
128
+ "B-QUANTITATIVE_CONCEPT": 62,
129
+ "B-SEVERITY": 63,
130
+ "B-SEX": 65,
131
+ "B-SHAPE": 67,
132
+ "B-SIGN_SYMPTOM": 69,
133
+ "B-SUBJECT": 71,
134
+ "B-TEXTURE": 73,
135
+ "B-THERAPEUTIC_PROCEDURE": 75,
136
+ "B-TIME": 78,
137
+ "B-VOLUME": 79,
138
+ "B-WEIGHT": 82,
139
+ "I-ACTIVITY": 2,
140
+ "I-ADMINISTRATION": 3,
141
+ "I-AGE": 6,
142
+ "I-AREA": 7,
143
+ "I-BIOLOGICAL_ATTRIBUTE": 10,
144
+ "I-BIOLOGICAL_STRUCTURE": 11,
145
+ "I-CLINICAL_EVENT": 14,
146
+ "I-COLOR": 16,
147
+ "I-COREFERENCE": 17,
148
+ "I-DATE": 20,
149
+ "I-DETAILED_DESCRIPTION": 21,
150
+ "I-DIAGNOSTIC_PROCEDURE": 23,
151
+ "I-DISEASE_DISORDER": 25,
152
+ "I-DISTANCE": 28,
153
+ "I-DOSAGE": 30,
154
+ "I-DURATION": 31,
155
+ "I-FAMILY_HISTORY": 33,
156
+ "I-FREQUENCY": 36,
157
+ "I-HEIGHT": 37,
158
+ "I-HISTORY": 40,
159
+ "I-LAB_VALUE": 41,
160
+ "I-MASS": 43,
161
+ "I-MEDICATION": 45,
162
+ "I-NONBIOLOGICAL_LOCATION": 47,
163
+ "I-OCCUPATION": 49,
164
+ "I-OTHER_ENTITY": 52,
165
+ "I-OTHER_EVENT": 54,
166
+ "I-OUTCOME": 55,
167
+ "I-PERSONAL_BACKGROUND": 57,
168
+ "I-QUALITATIVE_CONCEPT": 60,
169
+ "I-QUANTITATIVE_CONCEPT": 61,
170
+ "I-SEVERITY": 64,
171
+ "I-SEX": 66,
172
+ "I-SHAPE": 68,
173
+ "I-SIGN_SYMPTOM": 70,
174
+ "I-SUBJECT": 72,
175
+ "I-TEXTURE": 74,
176
+ "I-THERAPEUTIC_PROCEDURE": 76,
177
+ "I-TIME": 77,
178
+ "I-VOLUME": 80,
179
+ "I-WEIGHT": 81,
180
+ "O": 0
181
+ },
182
+ "max_position_embeddings": 512,
183
+ "model_type": "distilbert",
184
+ "n_heads": 12,
185
+ "n_layers": 6,
186
+ "output_past": true,
187
+ "pad_token_id": 0,
188
+ "qa_dropout": 0.1,
189
+ "seq_classif_dropout": 0.2,
190
+ "sinusoidal_pos_embds": false,
191
+ "tie_weights_": true,
192
+ "torch_dtype": "float32",
193
+ "transformers_version": "4.48.3",
194
+ "vocab_size": 28996
195
+ }
checkpoint-92/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:691568d1730026b95fe5a7d1005a616ca010681b8ec88899b99314c510979a77
3
+ size 261031300
checkpoint-92/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde04177a1dff0cc8be3fc543d4f5a3b313a0501cff485928d58f0838b00f3da
3
+ size 522123450
checkpoint-92/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c96f5a47b163e72e28fbde7afe1320d6bcc042926cd5cb52bdc6f70d90c6d4d
3
+ size 14244
checkpoint-92/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2c3a566d1aa24c99821b905999ebcfce96b1faae6ee23e3f85708be8e34a3d
3
+ size 1064
checkpoint-92/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-92/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-92/tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "max_length": 512,
50
+ "model_max_length": 512,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "stride": 0,
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "DistilBertTokenizer",
57
+ "truncation_side": "right",
58
+ "truncation_strategy": "longest_first",
59
+ "unk_token": "[UNK]"
60
+ }
checkpoint-92/trainer_state.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.76803058385849,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/Files/checkpoint-92",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 92,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.7553191489361702,
14
+ "eval_f1": 0.5810435319543069,
15
+ "eval_loss": 0.9222098588943481,
16
+ "eval_precision": 0.5859277708592777,
17
+ "eval_recall": 0.5762400489895897,
18
+ "eval_runtime": 0.369,
19
+ "eval_samples_per_second": 54.199,
20
+ "eval_steps_per_second": 8.13,
21
+ "step": 23
22
+ },
23
+ {
24
+ "epoch": 2.0,
25
+ "eval_accuracy": 0.7700754975978037,
26
+ "eval_f1": 0.6098380690498014,
27
+ "eval_loss": 0.8292961120605469,
28
+ "eval_precision": 0.6085365853658536,
29
+ "eval_recall": 0.6111451316595223,
30
+ "eval_runtime": 0.3909,
31
+ "eval_samples_per_second": 51.158,
32
+ "eval_steps_per_second": 7.674,
33
+ "step": 46
34
+ },
35
+ {
36
+ "epoch": 3.0,
37
+ "eval_accuracy": 0.7865477007549759,
38
+ "eval_f1": 0.6364756623536661,
39
+ "eval_loss": 0.782767653465271,
40
+ "eval_precision": 0.6404215747055176,
41
+ "eval_recall": 0.6325780771586038,
42
+ "eval_runtime": 0.3839,
43
+ "eval_samples_per_second": 52.099,
44
+ "eval_steps_per_second": 7.815,
45
+ "step": 69
46
+ },
47
+ {
48
+ "epoch": 4.0,
49
+ "eval_accuracy": 0.789293067947838,
50
+ "eval_f1": 0.641846153846154,
51
+ "eval_loss": 0.76803058385849,
52
+ "eval_precision": 0.645021645021645,
53
+ "eval_recall": 0.6387017758726271,
54
+ "eval_runtime": 0.4404,
55
+ "eval_samples_per_second": 45.415,
56
+ "eval_steps_per_second": 6.812,
57
+ "step": 92
58
+ }
59
+ ],
60
+ "logging_steps": 500,
61
+ "max_steps": 92,
62
+ "num_input_tokens_seen": 0,
63
+ "num_train_epochs": 4,
64
+ "save_steps": 500,
65
+ "stateful_callbacks": {
66
+ "TrainerControl": {
67
+ "args": {
68
+ "should_epoch_stop": false,
69
+ "should_evaluate": false,
70
+ "should_log": false,
71
+ "should_save": true,
72
+ "should_training_stop": true
73
+ },
74
+ "attributes": {}
75
+ }
76
+ },
77
+ "total_flos": 94208005324800.0,
78
+ "train_batch_size": 8,
79
+ "trial_name": null,
80
+ "trial_params": null
81
+ }
checkpoint-92/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce032d36c503c8ec4cc7b6976897685f8c25accd569b89148aacf93a4d442372
3
+ size 5368
checkpoint-92/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
images/info_24dp_1F1F1F_FILL0_wght400_GRAD0_opsz24.png ADDED
images/medical_information_24dp_1F1F1F_FILL0_wght400_GRAD0_opsz24.png ADDED
pages/about.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title("Info")
4
+
5
+ with st.expander("ℹ️ - About this app", expanded=True):
6
+
7
+ st.write(
8
+ """
9
+ - This app performs named entity recognition for medical entities.
10
+ - myDemo model was developed from dslim/distilbert-NER (a general NER model with 66M parameters) in HuggingFace, and fine-tuned on singh-aditya/MACCROBAT_biomedical_ner (a dataset annotated with medical entity labels in 41 categories).
11
+ - The model uses the default pretrained tokenizer in dslim/distilbert-NER.
12
+ """
13
+ )
pages/home.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title("πŸ“˜Named Entity Recognition")
4
+
5
+ st.header("Tags the below 41 medical entities")
6
+
7
+ 'ACTIVITY'
8
+ 'ADMINISTRATION'
9
+ 'AGE'
10
+ 'AREA'
11
+ 'BIOLOGICAL_ATTRIBUTE'
12
+ 'BIOLOGICAL_STRUCTURE'
13
+ 'CLINICAL_EVENT'
14
+ 'COLOR'
15
+ 'COREFERENCE'
16
+ 'DATE'
17
+ 'DETAILED_DESCRIPTION'
18
+ 'DIAGNOSTIC_PROCEDURE'
19
+ 'DISEASE_DISORDER'
20
+ 'DISTANCE'
21
+ 'DOSAGE'
22
+ 'DURATION'
23
+ 'FAMILY_HISTORY'
24
+ 'FREQUENCY'
25
+ 'HEIGHT'
26
+ 'HISTORY'
27
+ 'LAB_VALUE'
28
+ 'MASS'
29
+ 'MEDICATION'
30
+ 'NONBIOLOGICAL_LOCATION'
31
+ 'OCCUPATION'
32
+ 'OTHER_ENTITY'
33
+ 'OUTCOME'
34
+ 'PERSONAL_BACKGROUND'
35
+ 'QUALITATIVE_CONCEPT'
36
+ 'QUANTITATIVE_CONCEPT'
37
+ 'SEVERITY'
38
+ 'SEX'
39
+ 'SHAPE'
40
+ 'SIGN_SYMPTOM'
41
+ 'SUBJECT'
42
+ 'TEXTURE'
43
+ 'THERAPEUTIC_PROCEDURE'
44
+ 'TIME'
45
+ 'VOLUME'
46
+ 'WEIGHT'
47
+
48
+
49
+
50
+
51
+
52
+
53
+
pages/type_text.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from io import StringIO
4
+ import json
5
+ from transformers import pipeline
6
+ #from transformers import AutoTokenizer, AutoModelForTokenClassification
7
+
8
+ def on_click():
9
+ st.session_state.user_input = ""
10
+
11
+ #@st.cache
12
+ def convert_df(df:pd.DataFrame):
13
+ return df.to_csv(index=False).encode('utf-8')
14
+
15
+ #@st.cache
16
+ def convert_json(df:pd.DataFrame):
17
+ result = df.to_json(orient="index")
18
+ parsed = json.loads(result)
19
+ json_string = json.dumps(parsed)
20
+ #st.json(json_string, expanded=True)
21
+ return json_string
22
+
23
+ #st.title("πŸ“˜medical Named Entity Recognition Tagger")
24
+
25
+ text_input = st.text_input("Type input text and hit Enter", key="user_input")
26
+ st.button("Clear text", on_click=on_click)
27
+
28
+ my_model_results = pipeline("ner", model= "checkpoint-92")
29
+ HuggingFace_model_results = pipeline("ner", model = "blaze999/Medical-NER")
30
+
31
+ createNER_button = st.button("Create NER tags")
32
+
33
+ col1, col2 = st.columns([1,1.5])
34
+ col1.subheader("myDemo Model")
35
+ col2.subheader("blaze999/Medical-NER")
36
+
37
+
38
+ dictA = {"word": [], "entity": []}
39
+ dictB = {"word": [], "entity": []}
40
+
41
+ if text_input is not None and createNER_button == True:
42
+
43
+ with col1:
44
+ #st.write(my_model_results(text_input))
45
+ #col1.subheader("myDemo Model")
46
+ for result in my_model_results(text_input):
47
+ st.write(result['word'], result['entity'])
48
+ dictA["word"].append(result['word']), dictA["entity"].append(result['entity'])
49
+ dfA = pd.DataFrame.from_dict(dictA)
50
+ #st.write(dfA)
51
+ with col2:
52
+ #st.write(HuggingFace_model_results(text_input))
53
+ #col2.subheader("Hugging Face Model")
54
+ for result in HuggingFace_model_results(text_input):
55
+ st.write(result['word'], result['entity'])
56
+ dictB["word"].append(result['word']), dictB["entity"].append(result['entity'])
57
+ dfB = pd.DataFrame.from_dict(dictB)
58
+ #st.write(dfB)
59
+
60
+ bs, b1, b2, b3, bLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
61
+ with b1:
62
+ #csvbutton = download_button(results, "results.csv", "πŸ“₯ Download .csv")
63
+ csvbutton = st.download_button(label="πŸ“₯ Download .csv", data=convert_df(dfA), file_name= "results.csv", mime='text/csv', key='csv_b')
64
+ with b2:
65
+ #textbutton = download_button(results, "results.txt", "πŸ“₯ Download .txt")
66
+ textbutton = st.download_button(label="πŸ“₯ Download .txt", data=convert_df(dfA), file_name= "results.text", mime='text/plain', key='text_b')
67
+ with b3:
68
+ #jsonbutton = download_button(results, "results.json", "πŸ“₯ Download .json")
69
+ jsonbutton = st.download_button(label="πŸ“₯ Download .json", data=convert_json(dfA), file_name= "results.json", mime='application/json', key='json_b')
70
+
71
+
pages/upload_file.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from io import StringIO
4
+ import json
5
+ from transformers import pipeline
6
+ #from transformers import AutoTokenizer, AutoModelForTokenClassification
7
+
8
+ def on_click():
9
+ st.session_state.user_input = ""
10
+
11
+ #@st.cache
12
+ def convert_df(df:pd.DataFrame):
13
+ return df.to_csv(index=False).encode('utf-8')
14
+
15
+ #@st.cache
16
+ def convert_json(df:pd.DataFrame):
17
+ result = df.to_json(orient="index")
18
+ parsed = json.loads(result)
19
+ json_string = json.dumps(parsed)
20
+ #st.json(json_string, expanded=True)
21
+ return json_string
22
+
23
+ #st.title("πŸ“˜medical Named Entity Recognition Tagger")
24
+
25
+ uploaded_file = st.file_uploader(label = "Upload single text file")
26
+ if uploaded_file is not None:
27
+ stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
28
+ string_data = stringio.read()
29
+ st.success('Your file input is: '+ string_data, icon="βœ…")
30
+
31
+
32
+ my_model_results = pipeline("ner", model= "checkpoint-92")
33
+ HuggingFace_model_results = pipeline("ner", model = "blaze999/Medical-NER")
34
+
35
+
36
+ createNER_button = st.button("Create NER tags")
37
+
38
+ col1, col2 = st.columns([1,1.5])
39
+ col1.subheader("myDemo Model")
40
+ col2.subheader("blaze999/Medical-NER")
41
+
42
+ if uploaded_file is not None and createNER_button == True:
43
+ dict1 = {"word": [], "entity": []}
44
+ dict2 = {"word": [], "entity": []}
45
+ #stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
46
+ #string_data = stringio.read()
47
+ #st.write("Your input is: ", string_data)
48
+ with col1:
49
+ #st.write(my_model_results(string_data))
50
+ #col1.subheader("myDemo Model")
51
+ for result in my_model_results(string_data):
52
+ st.write(result['word'], result['entity'])
53
+ dict1["word"].append(result['word']), dict1["entity"].append(result['entity'])
54
+ df1 = pd.DataFrame.from_dict(dict1)
55
+ #st.write(df1)
56
+ with col2:
57
+ #st.write(HuggingFace_model_results(string_data))
58
+ #col2.subheader("Hugging Face Model")
59
+ for result in HuggingFace_model_results(string_data):
60
+ st.write(result['word'], result['entity'])
61
+ dict2["word"].append(result['word']), dict2["entity"].append(result['entity'])
62
+ df2 = pd.DataFrame.from_dict(dict2)
63
+ #st.write(df2)
64
+
65
+
66
+ cs, c1, c2, c3, cLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
67
+ with c1:
68
+ #csvbutton = download_button(results, "results.csv", "πŸ“₯ Download .csv")
69
+ csvbutton = st.download_button(label="πŸ“₯ Download .csv", data=convert_df(df1), file_name= "results.csv", mime='text/csv', key='csv')
70
+ with c2:
71
+ #textbutton = download_button(results, "results.txt", "πŸ“₯ Download .txt")
72
+ textbutton = st.download_button(label="πŸ“₯ Download .txt", data=convert_df(df1), file_name= "results.text", mime='text/plain', key='text')
73
+ with c3:
74
+ #jsonbutton = download_button(results, "results.json", "πŸ“₯ Download .json")
75
+ jsonbutton = st.download_button(label="πŸ“₯ Download .json", data=convert_json(df1), file_name= "results.json", mime='application/json', key='json')
76
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit==1.41.1
2
+ pandas==2.2.2
3
+ torch==2.4.0
4
+ transformers==4.44.2