import streamlit as st import os import time import re import json import requests from PIL import Image from openai import OpenAI # ------------------ App Configuration ------------------ st.set_page_config(page_title="Document AI Assistant", layout="wide") st.title("📄 Document AI Assistant") st.caption("Chat with an AI Assistant on your medical/pathology documents") # ------------------ Load API Key and Assistant ID ------------------ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") ASSISTANT_ID = os.environ.get("ASSISTANT_ID") if not OPENAI_API_KEY or not ASSISTANT_ID: st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.") st.stop() client = OpenAI(api_key=OPENAI_API_KEY) # ------------------ Session State Initialization ------------------ if "messages" not in st.session_state: st.session_state.messages = [] if "thread_id" not in st.session_state: st.session_state.thread_id = None if "image_url" not in st.session_state: st.session_state.image_url = None if "image_updated" not in st.session_state: st.session_state.image_updated = False # ------------------ Sidebar Controls ------------------ st.sidebar.header("🔧 Settings") if st.sidebar.button("🔄 Clear Chat"): st.session_state.messages = [] st.session_state.thread_id = None st.session_state.image_url = None st.session_state.image_updated = False st.rerun() show_image = st.sidebar.checkbox("📖 Show Document Image", value=True) # ------------------ Load Structured Summary/FAQ ------------------ with open("51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output.json", "r") as f: structured_data = json.load(f) # This is a list of dicts, not a dict # ------------------ Three-Column Layout ------------------ left, center, right = st.columns([1, 2, 1]) # ------------------ Left Column: Document Image ------------------ with left: st.subheader("📄 Document Image") if show_image and st.session_state.image_url: try: image = Image.open(requests.get(st.session_state.image_url, stream=True).raw) st.image(image, caption="📑 Extracted Page", use_container_width=True) st.session_state.image_updated = False except Exception as e: st.warning("⚠️ Could not load image.") # ------------------ Center Column: Chat UI ------------------ with center: st.subheader("💬 Document AI Assistant") for message in st.session_state.messages: role, content = message["role"], message["content"] st.chat_message(role).write(content) if prompt := st.chat_input("Type your question about the document..."): st.session_state.messages.append({"role": "user", "content": prompt}) st.chat_message("user").write(prompt) try: if st.session_state.thread_id is None: thread = client.beta.threads.create() st.session_state.thread_id = thread.id thread_id = st.session_state.thread_id client.beta.threads.messages.create( thread_id=thread_id, role="user", content=prompt ) run = client.beta.threads.runs.create( thread_id=thread_id, assistant_id=ASSISTANT_ID ) with st.spinner("Assistant is thinking..."): while True: run_status = client.beta.threads.runs.retrieve( thread_id=thread_id, run_id=run.id ) if run_status.status == "completed": break time.sleep(1) messages = client.beta.threads.messages.list(thread_id=thread_id) assistant_message = None for message in reversed(messages.data): if message.role == "assistant": assistant_message = message.content[0].text.value break st.chat_message("assistant").write(assistant_message) st.session_state.messages.append({"role": "assistant", "content": assistant_message}) # Extract GitHub image URL image_match = re.search( r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png', assistant_message ) if image_match: st.session_state.image_url = image_match.group(0) st.session_state.image_updated = True st.rerun() except Exception as e: st.error(f"❌ Error: {str(e)}") # ------------------ Right Column: Summary and FAQ ------------------ with right: st.subheader("📌 Summary") # Parse page number from image URL if available if st.session_state.image_url: match = re.search(r'page_(\d+)', st.session_state.image_url) page_number = int(match.group(1)) if match else 151 else: page_number = 151 # default # Get entry from structured data page_entry = next((entry for entry in structured_data if entry.get("page") == page_number), None) if page_entry: summary_text = page_entry.get("summary", "No summary available.") faq_list = page_entry.get("faqs", []) else: summary_text = "No summary available." faq_list = [] st.markdown(summary_text) st.subheader("❓ Auto-Generated FAQ") if faq_list: for faq in faq_list: st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}") else: st.info("No FAQs available for this page.")