File size: 1,528 Bytes
ba12ca2
e3f02cb
 
ba12ca2
e3f02cb
ba12ca2
e3f02cb
 
 
 
 
 
 
 
 
 
 
ba12ca2
 
 
e3f02cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# 

import torch
import gradio as gr
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from scipy.special import softmax

# Load model and tokenizer
model_path = "trained_clinicalbert"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define class labels manually (ensure it matches the trained model)
classes = ["Asthma", "COPD", "Lung Cancer", "Other Pulmonary", "Pleural Effusion", "Pneumonia", "Tuberculosis"]

# Prediction function
def predict_clinical_note(note):
    inputs = tokenizer(note, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    probs = softmax(outputs.logits.cpu().numpy(), axis=1)
    pred_idx = probs.argmax(axis=1)[0]
    pred_class = classes[pred_idx]
    confidence = float(probs[0][pred_idx])
    return f"{pred_class} (Confidence: {confidence:.2f})"

# Gradio interface
iface = gr.Interface(
    fn=predict_clinical_note,
    inputs=gr.Textbox(lines=6, placeholder="Paste clinical note here..."),
    outputs="text",
    title="Pulmonary Disease Classifier",
    description="Enter a clinical note to predict pulmonary condition (e.g., COPD, Pneumonia, Tuberculosis...)"
)

if __name__ == "__main__":
    iface.launch()