paragon-analytics commited on
Commit
b6e3578
·
verified ·
1 Parent(s): 4dc3fee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -132
app.py CHANGED
@@ -1,142 +1,121 @@
1
- import streamlit as st
2
- import gradio as gr
3
- import shap
4
  import numpy as np
5
- import scipy as sp
6
  import torch
7
- import tensorflow as tf
8
- import transformers
9
- from transformers import pipeline
10
- from transformers import RobertaTokenizer, RobertaModel
11
- from transformers import AutoModelForSequenceClassification
12
- from transformers import TFAutoModelForSequenceClassification
13
- from transformers import AutoTokenizer, AutoModelForTokenClassification
14
-
15
- import matplotlib.pyplot as plt
16
- import sys
17
- import csv
18
-
19
- csv.field_size_limit(sys.maxsize)
20
-
21
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
22
-
23
- tokenizer = AutoTokenizer.from_pretrained("paragon-analytics/ADRv1")
24
- model = AutoModelForSequenceClassification.from_pretrained("paragon-analytics/ADRv1").to(device)
25
-
26
- # build a pipeline object to do predictions
27
- pred = transformers.pipeline("text-classification", model=model,
28
- tokenizer=tokenizer, return_all_scores=True)
29
-
30
- explainer = shap.Explainer(pred)
31
-
32
- ##
33
- # classifier = transformers.pipeline("text-classification", model = "cross-encoder/qnli-electra-base")
34
-
35
- # def med_score(x):
36
- # label = x['label']
37
- # score_1 = x['score']
38
- # return round(score_1,3)
39
 
40
- # def sym_score(x):
41
- # label2sym= x['label']
42
- # score_1sym = x['score']
43
- # return round(score_1sym,3)
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
46
  ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple") # pass device=0 if using gpu
49
- #
50
-
51
- def adr_predict(x):
52
- encoded_input = tokenizer(x, return_tensors='pt')
53
- output = model(**encoded_input)
54
- scores = output[0][0].detach().numpy()
55
- scores = tf.nn.softmax(scores)
56
-
57
- shap_values = explainer([str(x).lower()])
58
- # # Find the index of the class you want as the default reference (e.g., 'label_1')
59
- # label_1_index = np.where(np.array(explainer.output_names) == 'label_1')[0][0]
60
-
61
- # # Plot the SHAP values for a specific instance in your dataset (e.g., instance 0)
62
- # shap.plots.text(shap_values[label_1_index][0])
63
-
64
- local_plot = shap.plots.text(shap_values[0], display=False)
65
-
66
- # med = med_score(classifier(x+str(", There is a medication."))[0])
67
- # sym = sym_score(classifier(x+str(", There is a symptom."))[0])
68
-
69
- res = ner_pipe(x)
70
-
71
- entity_colors = {
72
- 'Severity': 'red',
73
- 'Sign_symptom': 'green',
74
- 'Medication': 'lightblue',
75
- 'Age': 'yellow',
76
- 'Sex':'yellow',
77
- 'Diagnostic_procedure':'gray',
78
- 'Biological_structure':'silver'}
79
-
80
- htext = ""
81
- prev_end = 0
82
-
83
- for entity in res:
84
- start = entity['start']
85
- end = entity['end']
86
- word = entity['word'].replace("##", "")
87
- color = entity_colors[entity['entity_group']]
88
-
89
- htext += f"{x[prev_end:start]}<mark style='background-color:{color};'>{word}</mark>"
90
- prev_end = end
91
-
92
- htext += x[prev_end:]
93
-
94
- return {"Severe Reaction": float(scores.numpy()[1]), "Non-severe Reaction": float(scores.numpy()[0])}, local_plot,htext
95
- # ,{"Contains Medication": float(med), "No Medications": float(1-med)} , {"Contains Symptoms": float(sym), "No Symptoms": float(1-sym)}
96
-
97
-
98
- def main(prob1):
99
- text = str(prob1).lower()
100
- obj = adr_predict(text)
101
- return obj[0],obj[1],obj[2]
102
-
103
- title = "Welcome to **ADR Detector** 🪐"
104
- description1 = """This app takes text (up to a few sentences) and predicts to what extent the text describes severe (or non-severe) adverse reaction to medicaitons. Please do NOT use for medical diagnosis."""
105
-
106
- with gr.Blocks(title=title) as demo:
107
- gr.Markdown(f"## {title}")
108
- gr.Markdown(description1)
109
- gr.Markdown("""---""")
110
- prob1 = gr.Textbox(label="Enter Your Text Here:",lines=2, placeholder="Type it here ...")
111
- submit_btn = gr.Button("Analyze")
112
 
113
  with gr.Row():
114
-
115
- with gr.Column(visible=True) as output_col:
116
- label = gr.Label(label = "Predicted Label")
117
-
118
-
119
- with gr.Column(visible=True) as output_col:
120
- local_plot = gr.HTML(label = 'Shap:')
121
- htext = gr.HTML(label="NER")
122
- # med = gr.Label(label = "Contains Medication")
123
- # sym = gr.Label(label = "Contains Symptoms")
124
-
125
- submit_btn.click(
126
- main,
127
- [prob1],
128
- [label
129
- ,local_plot, htext
130
- # , med, sym
131
- ], api_name="adr"
132
  )
133
-
134
- with gr.Row():
135
- gr.Markdown("### Click on any of the examples below to see how it works:")
136
- gr.Examples([["A 35 year-old male had severe headache after taking Aspirin. The lab results were normal."],
137
- ["A 35 year-old female had minor pain in upper abdomen after taking Acetaminophen."]],
138
- [prob1], [label,local_plot, htext
139
- # , med, sym
140
- ], main, cache_examples=True)
141
-
142
- demo.launch()
 
 
 
 
1
  import numpy as np
 
2
  import torch
3
+ import shap
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ import gradio as gr
 
 
 
7
 
8
+ # 1) Device setup
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ # 2) Load ADR classifier
12
+ model_name = "paragon-analytics/ADRv1"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
15
+
16
+ # 3) Hugging Face text‐classification pipeline with return_all_scores
17
+ pred_pipeline = pipeline(
18
+ "text-classification",
19
+ model=model,
20
+ tokenizer=tokenizer,
21
+ return_all_scores=True,
22
+ device=0 if device == "cuda" else -1
23
+ )
24
+
25
+ # 4) Wrapper: list[str]→np.ndarray of shape (n, n_classes)
26
+ def predict_proba(texts):
27
+ if isinstance(texts, str):
28
+ texts = [texts]
29
+ results = pred_pipeline(texts)
30
+ # results is List[List[{"label":…, "score":…}]]
31
+ probs = np.array([[d["score"] for d in sample] for sample in results])
32
+ return probs
33
+
34
+ # 5) Build SHAP explainer
35
+ masker = shap.maskers.Text(tokenizer) # for text explainability
36
+ # get output names from a dummy call
37
+ example = pred_pipeline(["test"])[0]
38
+ class_labels = [d["label"] for d in example]
39
+ explainer = shap.Explainer(
40
+ predict_proba,
41
+ masker=masker,
42
+ output_names=class_labels
43
+ )
44
+
45
+ # 6) Load biomedical NER pipeline
46
  ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
47
  ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
48
+ ner_pipe = pipeline(
49
+ "ner",
50
+ model=ner_model,
51
+ tokenizer=ner_tokenizer,
52
+ aggregation_strategy="simple",
53
+ device=0 if device == "cuda" else -1
54
+ )
55
+
56
+ # 7) Single‐text prediction + SHAP + NER
57
+ def adr_predict(text):
58
+ # a) Predict probabilities
59
+ probs = predict_proba(text)[0]
60
+ prob_dict = {label: float(probs[i]) for i, label in enumerate(class_labels)}
61
+
62
+ # b) SHAP explanation (returns a Matplotlib figure)
63
+ shap_values = explainer([text])
64
+ fig = shap.plots.text(shap_values[0], display=False)
65
+
66
+ # c) NER highlighting
67
+ entities = ner_pipe(text)
68
+ colors = {
69
+ "Severity": "red",
70
+ "Sign_symptom": "green",
71
+ "Medication": "lightblue",
72
+ "Age": "yellow",
73
+ "Sex": "yellow",
74
+ "Diagnostic_procedure": "gray",
75
+ "Biological_structure": "silver"
76
+ }
77
+ highlighted = ""
78
+ last_idx = 0
79
+ for ent in entities:
80
+ start, end = ent["start"], ent["end"]
81
+ word = ent["word"].replace("##", "")
82
+ color = colors.get(ent["entity_group"], "lightgray")
83
+ highlighted += (
84
+ text[last_idx:start]
85
+ + f"<mark style='background-color:{color};'>{word}</mark>"
86
+ )
87
+ last_idx = end
88
+ highlighted += text[last_idx:]
89
+
90
+ return prob_dict, fig, highlighted
91
+
92
+ # 8) Gradio UI
93
+ with gr.Blocks() as demo:
94
+ gr.Markdown("## Welcome to **ADR Detector** 🪐")
95
+ gr.Markdown(
96
+ "Predicts the likelihood your text describes a severe vs. non-severe adverse reaction. "
97
+ "_(Not for medical diagnosis.)_"
98
+ )
99
 
100
+ txt = gr.Textbox(label="Enter Your Text Here:", lines=3, placeholder="Type a sentence about a reaction…")
101
+ btn = gr.Button("Analyze")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  with gr.Row():
104
+ lbl = gr.Label(label="Predicted Probabilities")
105
+ shp = gr.Plot(label="SHAP Explanation")
106
+ ner = gr.HTML(label="Biomedical Entities Highlighted")
107
+
108
+ btn.click(fn=adr_predict, inputs=txt, outputs=[lbl, shp, ner])
109
+
110
+ gr.Examples(
111
+ examples=[
112
+ "A 35-year-old male experienced severe headache after taking Aspirin.",
113
+ "A 35-year-old female had minor abdominal pain after Acetaminophen."
114
+ ],
115
+ inputs=txt,
116
+ outputs=[lbl, shp, ner],
117
+ fn=adr_predict,
118
+ cache_examples=True
 
 
 
119
  )
120
+
121
+ demo.launch()