Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import time | |
import re | |
import json | |
import requests | |
from PIL import Image | |
from openai import OpenAI | |
from io import BytesIO | |
# ------------------ App Configuration ------------------ | |
st.set_page_config(page_title="Document AI Assistant", layout="wide") | |
st.title("π Document AI Assistant") | |
st.caption("Chat with an AI Assistant on your medical/pathology documents") | |
# ------------------ Load API Key and Assistant ID ------------------ | |
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") | |
ASSISTANT_ID = os.environ.get("ASSISTANT_ID") | |
if not OPENAI_API_KEY or not ASSISTANT_ID: | |
st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.") | |
st.stop() | |
client = OpenAI(api_key=OPENAI_API_KEY) | |
# ------------------ Load Structured JSON ------------------ | |
STRUCTURED_JSON_PATH = "51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output.json" | |
try: | |
with open(STRUCTURED_JSON_PATH, "r") as f: | |
structured_data = json.load(f) | |
except Exception as e: | |
st.error(f"β Failed to load structured summary file: {e}") | |
st.stop() | |
# ------------------ Session State Initialization ------------------ | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
if "thread_id" not in st.session_state: | |
st.session_state.thread_id = None | |
if "image_url" not in st.session_state: | |
st.session_state.image_url = None | |
if "image_updated" not in st.session_state: | |
st.session_state.image_updated = False | |
# ------------------ Sidebar Controls ------------------ | |
st.sidebar.header("π§ Settings") | |
if st.sidebar.button("π Clear Chat"): | |
st.session_state.messages = [] | |
st.session_state.thread_id = None | |
st.session_state.image_url = None | |
st.session_state.image_updated = False | |
st.rerun() | |
show_image = st.sidebar.checkbox("π Show Document Image", value=True) | |
# ------------------ Layout ------------------ | |
left, center, right = st.columns([1, 2, 1]) | |
# ------------------ Left Column: Document Image ------------------ | |
with left: | |
st.subheader("π Document Image") | |
if show_image and st.session_state.image_url: | |
try: | |
image = Image.open(requests.get(st.session_state.image_url, stream=True).raw) | |
st.image(image, caption="π Extracted Page", use_container_width=True) | |
st.session_state.image_updated = False | |
except Exception as e: | |
st.warning("β οΈ Could not load image.") | |
# ------------------ Center Column: Chat UI ------------------ | |
with center: | |
st.subheader("π¬ Document AI Assistant") | |
for message in st.session_state.messages: | |
role, content = message["role"], message["content"] | |
st.chat_message(role).write(content) | |
if prompt := st.chat_input("Type your question about the document..."): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
st.chat_message("user").write(prompt) | |
try: | |
if st.session_state.thread_id is None: | |
thread = client.beta.threads.create() | |
st.session_state.thread_id = thread.id | |
thread_id = st.session_state.thread_id | |
client.beta.threads.messages.create( | |
thread_id=thread_id, | |
role="user", | |
content=prompt | |
) | |
run = client.beta.threads.runs.create( | |
thread_id=thread_id, | |
assistant_id=ASSISTANT_ID | |
) | |
with st.spinner("Assistant is thinking..."): | |
while True: | |
run_status = client.beta.threads.runs.retrieve( | |
thread_id=thread_id, | |
run_id=run.id | |
) | |
if run_status.status == "completed": | |
break | |
time.sleep(1) | |
messages = client.beta.threads.messages.list(thread_id=thread_id) | |
assistant_message = None | |
for message in reversed(messages.data): | |
if message.role == "assistant": | |
assistant_message = message.content[0].text.value | |
break | |
st.chat_message("assistant").write(assistant_message) | |
st.session_state.messages.append({"role": "assistant", "content": assistant_message}) | |
# Extract GitHub image URL | |
image_match = re.search( | |
r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png', | |
assistant_message | |
) | |
if image_match: | |
st.session_state.image_url = image_match.group(0) | |
st.session_state.image_updated = True | |
st.rerun() | |
except Exception as e: | |
st.error(f"β Error: {str(e)}") | |
# ------------------ Right Column: Structured Summary + FAQ (Button-based) ------------------ | |
with right: | |
st.subheader("π Summary & FAQ (from Structured Data)") | |
col1, col2 = st.columns(2) | |
show_summary = col1.button("π Load Summary") | |
show_faq = col2.button("β Load FAQ") | |
summary_text = "Click the button to load summary." | |
faq_list = [] | |
if st.session_state.image_url: | |
match = re.search(r'/(\d{3})\.png', st.session_state.image_url) | |
if match: | |
page_number = int(match.group(1)) | |
page_entry = next((entry for entry in structured_data if entry.get("page_number") == page_number), None) | |
if page_entry: | |
if show_summary: | |
summary_text = page_entry.get("summary", "No summary available.") | |
if show_faq: | |
faq_list = page_entry.get("faqs", []) or page_entry.get("questions", []) | |
# Display Summary | |
if show_summary: | |
st.subheader("π Summary") | |
st.markdown(summary_text) | |
# Display FAQs | |
if show_faq: | |
st.subheader("β Auto-Generated FAQ") | |
if faq_list: | |
for faq in faq_list: | |
if isinstance(faq, dict): | |
st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}") | |
else: | |
st.markdown(f"**Q:** {faq}") | |
else: | |
st.info("No FAQs available for this page.") | |