import streamlit as st from transformers import pipeline import pdfplumber from PIL import Image import easyocr # Initialize Models @st.cache_resource def initialize_models(): return { "report_check_model": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"), "sentiment_model": pipeline("sentiment-analysis"), "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"), "translation_model": { "en": pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en"), "hi": pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi"), "ur": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur") } } # Extract text from PDF def extract_text_from_pdf(pdf_file): text = "" with pdfplumber.open(pdf_file) as pdf: for page in pdf.pages: text += page.extract_text() return text.strip() # Extract text from Image using EasyOCR def extract_text_from_image(image_file): reader = easyocr.Reader(['en']) # Add more languages if needed image = Image.open(image_file) result = reader.readtext(image, detail=0) # `detail=0` returns only the text return " ".join(result).strip() # Preprocess text for model input def preprocess_text(text, max_length=1024): return text[:max_length] if len(text) > max_length else text # Check if content is a lab report def is_lab_report(text, model): result = model(text, candidate_labels=["lab report", "not lab report"]) return result["labels"][0] == "lab report" # Analyze sentiment def analyze_sentiment(text, sentiment_model): result = sentiment_model(text)[0] sentiment = "Positive" if result["label"] == "POSITIVE" else "Negative" return sentiment, result["score"] # Summarize content def summarize_content(text, summarize_model): summary = summarize_model(text, max_length=130, min_length=30, do_sample=False) return summary[0]['summary_text'] # Translate content def translate_content(text, translation_models): return { "English": text, "Hindi": translation_models["hi"](text)[0]["translation_text"], "Urdu": translation_models["ur"](text)[0]["translation_text"] } # Streamlit App def main(): st.title("Lab Test Analyzer") models = initialize_models() uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"]) if uploaded_file: file_type = uploaded_file.name.split(".")[-1].lower() extracted_text = "" if file_type == "pdf": st.write("Processing PDF file...") extracted_text = extract_text_from_pdf(uploaded_file) elif file_type in ["png", "jpg", "jpeg"]: st.write("Processing Image file...") extracted_text = extract_text_from_image(uploaded_file) elif file_type == "txt": st.write("Processing Text file...") extracted_text = uploaded_file.read().decode("utf-8") else: st.error("Unsupported file type.") return if extracted_text: st.subheader("Extracted Content") st.text_area("Extracted Text", extracted_text, height=200) # Preprocess text preprocessed_text = preprocess_text(extracted_text) # Check if it's a lab report if not is_lab_report(preprocessed_text, models["report_check_model"]): st.error("The uploaded file does not appear to be a lab report.") return st.success("The uploaded file is a valid lab report.") # Sentiment Analysis sentiment, confidence = analyze_sentiment(preprocessed_text, models["sentiment_model"]) st.subheader("Sentiment Analysis") st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})") # Summarization summary = summarize_content(preprocessed_text, models["summarize_model"]) st.subheader("Summary") st.text_area("Summary", summary, height=150) # Translation translations = translate_content(summary, models["translation_model"]) st.subheader("Translations") st.write("**English**: ", translations["English"]) st.write("**Hindi**: ", translations["Hindi"]) st.write("**Urdu**: ", translations["Urdu"]) else: st.error("Could not extract text from the uploaded file.") if __name__ == "__main__": main()