Spaces:

UVA-MSBA
/

ADR_Detector

Running

App Files Files Community

paragon-analytics commited on 7 days ago

Commit

b6e3578

verified ·

1 Parent(s): 4dc3fee

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -132

app.py CHANGED Viewed

@@ -1,142 +1,121 @@
-import streamlit as st
-import gradio as gr
-import shap
 import numpy as np
-import scipy as sp
 import torch
-import tensorflow as tf
-import transformers
-from transformers import pipeline
-from transformers import RobertaTokenizer, RobertaModel
-from transformers import AutoModelForSequenceClassification
-from transformers import TFAutoModelForSequenceClassification
-from transformers import AutoTokenizer, AutoModelForTokenClassification
-import matplotlib.pyplot as plt
-import sys
-import csv
-csv.field_size_limit(sys.maxsize)
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-tokenizer = AutoTokenizer.from_pretrained("paragon-analytics/ADRv1")
-model = AutoModelForSequenceClassification.from_pretrained("paragon-analytics/ADRv1").to(device)
-# build a pipeline object to do predictions
-pred = transformers.pipeline("text-classification", model=model,
-                             tokenizer=tokenizer, return_all_scores=True)
-explainer = shap.Explainer(pred)
-##
-# classifier = transformers.pipeline("text-classification", model = "cross-encoder/qnli-electra-base")
-# def med_score(x):
-#     label = x['label']
-#     score_1 = x['score']
-#     return round(score_1,3)
-# def sym_score(x):
-#     label2sym= x['label']
-#     score_1sym = x['score']
-#     return round(score_1sym,3)
 ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
 ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
-ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple") # pass device=0 if using gpu
-#
-def adr_predict(x):
-    encoded_input = tokenizer(x, return_tensors='pt')
-    output = model(**encoded_input)
-    scores = output[0][0].detach().numpy()
-    scores = tf.nn.softmax(scores)
-    shap_values = explainer([str(x).lower()])
-    # # Find the index of the class you want as the default reference (e.g., 'label_1')
-    # label_1_index = np.where(np.array(explainer.output_names) == 'label_1')[0][0]
-    # # Plot the SHAP values for a specific instance in your dataset (e.g., instance 0)
-    # shap.plots.text(shap_values[label_1_index][0])
-    local_plot = shap.plots.text(shap_values[0], display=False)
-    # med = med_score(classifier(x+str(", There is a medication."))[0])
-    # sym = sym_score(classifier(x+str(", There is a symptom."))[0])
-    res = ner_pipe(x)
-    entity_colors = {
-    'Severity': 'red',
-    'Sign_symptom': 'green',
-    'Medication': 'lightblue',
-    'Age': 'yellow',
-    'Sex':'yellow',
-    'Diagnostic_procedure':'gray',
-    'Biological_structure':'silver'}
-    htext = ""
-    prev_end = 0
-    for entity in res:
-        start = entity['start']
-        end = entity['end']
-        word = entity['word'].replace("##", "")
-        color = entity_colors[entity['entity_group']]
-        htext += f"{x[prev_end:start]}<mark style='background-color:{color};'>{word}</mark>"
-        prev_end = end
-    htext += x[prev_end:]
-    return {"Severe Reaction": float(scores.numpy()[1]), "Non-severe Reaction": float(scores.numpy()[0])}, local_plot,htext
-    # ,{"Contains Medication": float(med), "No Medications": float(1-med)} , {"Contains Symptoms": float(sym), "No Symptoms": float(1-sym)}
-def main(prob1):
-    text = str(prob1).lower()
-    obj = adr_predict(text)
-    return obj[0],obj[1],obj[2]
-title = "Welcome to **ADR Detector** 🪐"
-description1 = """This app takes text (up to a few sentences) and predicts to what extent the text describes severe (or non-severe) adverse reaction to medicaitons. Please do NOT use for medical diagnosis."""
-with gr.Blocks(title=title) as demo:
-    gr.Markdown(f"## {title}")
-    gr.Markdown(description1)
-    gr.Markdown("""---""")
-    prob1 = gr.Textbox(label="Enter Your Text Here:",lines=2, placeholder="Type it here ...")
-    submit_btn = gr.Button("Analyze")
     with gr.Row():
-        with gr.Column(visible=True) as output_col:
-            label = gr.Label(label = "Predicted Label")
-        with gr.Column(visible=True) as output_col:
-            local_plot = gr.HTML(label = 'Shap:')
-            htext = gr.HTML(label="NER")
-            # med = gr.Label(label = "Contains Medication")
-            # sym = gr.Label(label = "Contains Symptoms")
-    submit_btn.click(
-        main,
-        [prob1],
-        [label
-         ,local_plot, htext
-         # , med, sym
-        ], api_name="adr"
     )
-    with gr.Row():
-        gr.Markdown("### Click on any of the examples below to see how it works:")
-        gr.Examples([["A 35 year-old male had severe headache after taking Aspirin. The lab results were normal."],
-                     ["A 35 year-old female had minor pain in upper abdomen after taking Acetaminophen."]],
-                    [prob1], [label,local_plot, htext
-         # , med, sym
-                             ], main, cache_examples=True)
-demo.launch()

 import numpy as np
 import torch
+import shap
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
+import gradio as gr
+# 1) Device setup
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# 2) Load ADR classifier
+model_name = "paragon-analytics/ADRv1"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
+# 3) Hugging Face text‐classification pipeline with return_all_scores
+pred_pipeline = pipeline(
+    "text-classification",
+    model=model,
+    tokenizer=tokenizer,
+    return_all_scores=True,
+    device=0 if device == "cuda" else -1
+)
+# 4) Wrapper: list[str]→np.ndarray of shape (n, n_classes)
+def predict_proba(texts):
+    if isinstance(texts, str):
+        texts = [texts]
+    results = pred_pipeline(texts)
+    # results is List[List[{"label":…, "score":…}]]
+    probs = np.array([[d["score"] for d in sample] for sample in results])
+    return probs
+# 5) Build SHAP explainer
+masker = shap.maskers.Text(tokenizer)  # for text explainability
+# get output names from a dummy call
+example = pred_pipeline(["test"])[0]
+class_labels = [d["label"] for d in example]
+explainer = shap.Explainer(
+    predict_proba,
+    masker=masker,
+    output_names=class_labels
+)
+# 6) Load biomedical NER pipeline
 ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
 ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
+ner_pipe = pipeline(
+    "ner",
+    model=ner_model,
+    tokenizer=ner_tokenizer,
+    aggregation_strategy="simple",
+    device=0 if device == "cuda" else -1
+)
+# 7) Single‐text prediction + SHAP + NER
+def adr_predict(text):
+    # a) Predict probabilities
+    probs = predict_proba(text)[0]
+    prob_dict = {label: float(probs[i]) for i, label in enumerate(class_labels)}
+    # b) SHAP explanation (returns a Matplotlib figure)
+    shap_values = explainer([text])
+    fig = shap.plots.text(shap_values[0], display=False)
+    # c) NER highlighting
+    entities = ner_pipe(text)
+    colors = {
+        "Severity": "red",
+        "Sign_symptom": "green",
+        "Medication": "lightblue",
+        "Age": "yellow",
+        "Sex": "yellow",
+        "Diagnostic_procedure": "gray",
+        "Biological_structure": "silver"
+    }
+    highlighted = ""
+    last_idx = 0
+    for ent in entities:
+        start, end = ent["start"], ent["end"]
+        word = ent["word"].replace("##", "")
+        color = colors.get(ent["entity_group"], "lightgray")
+        highlighted += (
+            text[last_idx:start]
+            + f"<mark style='background-color:{color};'>{word}</mark>"
+        )
+        last_idx = end
+    highlighted += text[last_idx:]
+    return prob_dict, fig, highlighted
+# 8) Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("## Welcome to **ADR Detector** 🪐")
+    gr.Markdown(
+        "Predicts the likelihood your text describes a severe vs. non-severe adverse reaction. "
+        "_(Not for medical diagnosis.)_"
+    )
+    txt = gr.Textbox(label="Enter Your Text Here:", lines=3, placeholder="Type a sentence about a reaction…")
+    btn = gr.Button("Analyze")
     with gr.Row():
+        lbl = gr.Label(label="Predicted Probabilities")
+        shp = gr.Plot(label="SHAP Explanation")
+        ner = gr.HTML(label="Biomedical Entities Highlighted")
+    btn.click(fn=adr_predict, inputs=txt, outputs=[lbl, shp, ner])
+    gr.Examples(
+        examples=[
+            "A 35-year-old male experienced severe headache after taking Aspirin.",
+            "A 35-year-old female had minor abdominal pain after Acetaminophen."
+        ],
+        inputs=txt,
+        outputs=[lbl, shp, ner],
+        fn=adr_predict,
+        cache_examples=True
     )
+demo.launch()