Spaces:

UVA-MSBA
/

ADR_Detector

Running

App Files Files Community

paragon-analytics commited on 7 days ago

Commit

d2b9b3e

verified ·

1 Parent(s): 41b32e2

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -51

app.py CHANGED Viewed

@@ -9,15 +9,14 @@ from transformers import (
 )
 import gradio as gr
-# 1) Device setup
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# 2) Load ADR classifier model & tokenizer
 model_name = "paragon-analytics/ADRv1"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
-# 3) Build HF text-classification pipeline
 pred_pipeline = pipeline(
     "text-classification",
     model=model,
@@ -26,40 +25,30 @@ pred_pipeline = pipeline(
     device=0 if device.type == "cuda" else -1
 )
-# 4) Base predict_proba: List[str] → np.ndarray of shape (n_samples, n_classes)
 def predict_proba(texts):
     if isinstance(texts, str):
         texts = [texts]
     results = pred_pipeline(texts)
-    # results: List[List[{"label":…, "score":…}]]
-    probs = np.array([[d["score"] for d in sample] for sample in results])
-    return probs
-# 5) SHAP-compatible wrapper: joins token lists back into strings
 def predict_proba_shap(inputs):
-    # inputs: List[str] or List[List[str]]
-    texts = [
-        " ".join(x) if isinstance(x, list) else x
-        for x in inputs
-    ]
     return predict_proba(texts)
-# 6) Instantiate SHAP explainer with a Text masker
 masker = shap.maskers.Text(tokenizer)
-# Grab output class labels from a dummy sample
 _example = pred_pipeline(["test"])[0]
 class_labels = [d["label"] for d in _example]
 explainer = shap.Explainer(
     predict_proba_shap,
     masker=masker,
     output_names=class_labels
 )
-# 7) Load biomedical NER model & pipeline
-ner_model_name = "d4data/biomedical-ner-all"
-ner_tokenizer = AutoTokenizer.from_pretrained(ner_model_name)
-ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name).to(device)
 ner_pipe = pipeline(
     "ner",
     model=ner_model,
@@ -68,7 +57,6 @@ ner_pipe = pipeline(
     device=0 if device.type == "cuda" else -1
 )
-# 8) Mapping for entity highlight colors
 ENTITY_COLORS = {
     "Severity": "red",
     "Sign_symptom": "green",
@@ -79,57 +67,49 @@ ENTITY_COLORS = {
     "Biological_structure": "silver"
 }
-# 9) Full predict + explain + NER function
 def adr_predict(text: str):
-    # a) Predict probabilities
     probs = predict_proba([text])[0]
-    prob_dict = {label: float(probs[i]) for i, label in enumerate(class_labels)}
-    # b) SHAP explanation → Matplotlib figure
-    shap_values = explainer([text])
-    fig = shap.plots.text(shap_values[0], display=False)
-    # c) NER highlighting
     ents = ner_pipe(text)
-    highlighted = ""
-    last_idx = 0
     for ent in ents:
-        start, end = ent["start"], ent["end"]
-        word = ent["word"].replace("##", "")
         color = ENTITY_COLORS.get(ent["entity_group"], "lightgray")
-        highlighted += (
-            text[last_idx:start]
-            + f"<mark style='background-color:{color};'>{word}</mark>"
-        )
-        last_idx = end
-    highlighted += text[last_idx:]
     return prob_dict, fig, highlighted
-# 10) Build Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("## Welcome to **ADR Detector** 🪐")
     gr.Markdown(
-        "Predicts the likelihood your text describes a **severe** vs. **non-severe** adverse reaction.  \n"
         "_(Not for medical or diagnostic use.)_"
     )
     txt = gr.Textbox(
-        label="Enter Your Text Here:",
-        lines=3,
         placeholder="Type a sentence about an adverse reaction…"
     )
     btn = gr.Button("Analyze")
     with gr.Row():
-        label_out = gr.Label(label="Predicted Probabilities")
-        shap_out = gr.Plot(label="SHAP Explanation")
-        ner_out = gr.HTML(label="Biomedical Entities Highlighted")
     btn.click(
         fn=adr_predict,
         inputs=txt,
-        outputs=[label_out, shap_out, ner_out]
     )
     gr.Examples(
@@ -138,9 +118,9 @@ with gr.Blocks() as demo:
             "A 35-year-old female had minor abdominal pain after Acetaminophen."
         ],
         inputs=txt,
-        outputs=[label_out, shap_out, ner_out],
         fn=adr_predict,
-        cache_examples=True
     )
 if __name__ == "__main__":

 )
 import gradio as gr
+# ————————— 1) Device setup —————————
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# ————————— 2) ADR classifier —————————
 model_name = "paragon-analytics/ADRv1"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
 pred_pipeline = pipeline(
     "text-classification",
     model=model,
     device=0 if device.type == "cuda" else -1
 )
 def predict_proba(texts):
     if isinstance(texts, str):
         texts = [texts]
     results = pred_pipeline(texts)
+    return np.array([[d["score"] for d in sample] for sample in results])
 def predict_proba_shap(inputs):
+    texts = [" ".join(x) if isinstance(x, list) else x for x in inputs]
     return predict_proba(texts)
+# ————————— 3) SHAP explainer —————————
 masker = shap.maskers.Text(tokenizer)
 _example = pred_pipeline(["test"])[0]
 class_labels = [d["label"] for d in _example]
 explainer = shap.Explainer(
     predict_proba_shap,
     masker=masker,
     output_names=class_labels
 )
+# ————————— 4) Biomedical NER —————————
+ner_name = "d4data/biomedical-ner-all"
+ner_tokenizer = AutoTokenizer.from_pretrained(ner_name)
+ner_model = AutoModelForTokenClassification.from_pretrained(ner_name).to(device)
 ner_pipe = pipeline(
     "ner",
     model=ner_model,
     device=0 if device.type == "cuda" else -1
 )
 ENTITY_COLORS = {
     "Severity": "red",
     "Sign_symptom": "green",
     "Biological_structure": "silver"
 }
+# ————————— 5) Prediction + SHAP + NER —————————
 def adr_predict(text: str):
+    # Probabilities
     probs = predict_proba([text])[0]
+    prob_dict = {cls: float(probs[i]) for i, cls in enumerate(class_labels)}
+    # SHAP
+    shap_vals = explainer([text])
+    fig = shap.plots.text(shap_vals[0], display=False)
+    # NER highlight
     ents = ner_pipe(text)
+    highlighted, last = "", 0
     for ent in ents:
+        s, e = ent["start"], ent["end"]
+        w = ent["word"].replace("##", "")
         color = ENTITY_COLORS.get(ent["entity_group"], "lightgray")
+        highlighted += text[last:s] + f"<mark style='background-color:{color};'>{w}</mark>"
+        last = e
+    highlighted += text[last:]
     return prob_dict, fig, highlighted
+# ————————— 6) Gradio UI —————————
 with gr.Blocks() as demo:
     gr.Markdown("## Welcome to **ADR Detector** 🪐")
     gr.Markdown(
+        "Predicts how likely your text describes a **severe** vs. **non-severe** adverse reaction.  \n"
         "_(Not for medical or diagnostic use.)_"
     )
     txt = gr.Textbox(
+        label="Enter Your Text Here:", lines=3,
         placeholder="Type a sentence about an adverse reaction…"
     )
     btn = gr.Button("Analyze")
     with gr.Row():
+        out_prob = gr.Label(label="Predicted Probabilities")
+        out_shap = gr.Plot(label="SHAP Explanation")
+        out_ner  = gr.HTML(label="Biomedical Entities Highlighted")
     btn.click(
         fn=adr_predict,
         inputs=txt,
+        outputs=[out_prob, out_shap, out_ner]
     )
     gr.Examples(
             "A 35-year-old female had minor abdominal pain after Acetaminophen."
         ],
         inputs=txt,
+        outputs=[out_prob, out_shap, out_ner],
         fn=adr_predict,
+        cache_examples=False   # ← disable startup caching here
     )
 if __name__ == "__main__":