Spaces:

flax-community
/

medclip-demo

Runtime error

File size: 2,666 Bytes

import os
import sys
import jax
import streamlit as st
import transformers
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer
import torch
from torchvision.io import ImageReadMode, read_image


LOCAL_PATH = snapshot_download("flax-community/medclip")
sys.path.append(LOCAL_PATH)

from src.modeling_medclip import FlaxMedCLIP
from run_medclip import Transform


def prepare_image(image_path, model):
    image = read_image(image_path, mode=ImageReadMode.RGB)
    preprocess = Transform(model.config.vision_config.image_size)
    preprocess = torch.jit.script(preprocess)
    preprocessed_image = preprocess(image)
    pixel_values = torch.stack([preprocessed_image]).permute(0, 2, 3, 1).numpy()
    return pixel_values

def prepare_text(text, tokenizer):
    return tokenizer(text, return_tensors="np")

def save_file_to_disk(uplaoded_file):
    temp_file = os.path.join("/tmp", uplaoded_file.name)
    with open(temp_file, "wb") as f:
        f.write(uploaded_file.getbuffer())
    return temp_file
@st.cache(
    hash_funcs={
        transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: id,
        FlaxMedCLIP: id,
    },
    show_spinner=False
)
def load_tokenizer_and_model():
    # load the saved model
    tokenizer = AutoTokenizer.from_pretrained("allenai/scibert_scivocab_uncased")
    model = FlaxMedCLIP.from_pretrained(LOCAL_PATH)
    return tokenizer, model

def run_inference(image_path, text, model, tokenizer):
    pixel_values = prepare_image(image_path, model)
    input_text = prepare_text(text, tokenizer)
    model_output = model(
        input_text["input_ids"],
        pixel_values,
        attention_mask=input_text["attention_mask"],
        train=False,
        return_dict=True,
    )
    logits = model_output["logits_per_image"]
    score = jax.nn.sigmoid(logits)[0][0]
    return score

tokenizer, model = load_tokenizer_and_model()
st.title("Diagnosis Scoring")
uploaded_file = st.file_uploader("Choose an chest x-ray...", type=["png", "jpg"])
text_input = st.text_input("Type the doctor diagnosis")
if uploaded_file is not None and text_input:
    local_image_path = None
    try:
        local_image_path = save_file_to_disk(uploaded_file)
        score = run_inference(local_image_path, text_input, model, tokenizer).tolist()
        st.image(
            uploaded_file,
            caption=text_input,
            width=None,
            use_column_width=None,
            clamp=False,
            channels="RGB",
            output_format="auto",
        )
        st.write(f"## Score: {score:.2f}")
    finally:
        if local_image_path:
            os.remove(local_image_path)