Spaces:

IAMTFRMZA
/

documentaitest

Running

App Files Files Community

IAMTFRMZA commited on 17 days ago

Commit

c1043ca

verified ·

1 Parent(s): b386f62

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -166

app.py CHANGED Viewed

@@ -1,170 +1,58 @@
 import streamlit as st
-import os
-import time
-import re
 import json
-import requests
-from PIL import Image
-from openai import OpenAI
-from io import BytesIO
-# ------------------ App Configuration ------------------
-st.set_page_config(page_title="Document AI Assistant", layout="wide")
-st.title("📄 Document AI Assistant")
-st.caption("Chat with an AI Assistant on your medical/pathology documents")
-# ------------------ Load API Key and Assistant ID ------------------
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
-ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
-if not OPENAI_API_KEY or not ASSISTANT_ID:
-    st.error("Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
-    st.stop()
-client = OpenAI(api_key=OPENAI_API_KEY)
-# ------------------ Load Structured JSON ------------------
-STRUCTURED_JSON_PATH = "51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output.json"
-try:
-    with open(STRUCTURED_JSON_PATH, "r") as f:
-        structured_data = json.load(f)
-except Exception as e:
-    st.error(f"❌ Failed to load structured summary file: {e}")
-    st.stop()
-# ------------------ Session State Initialization ------------------
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-if "thread_id" not in st.session_state:
-    st.session_state.thread_id = None
-if "image_url" not in st.session_state:
-    st.session_state.image_url = None
-if "image_updated" not in st.session_state:
-    st.session_state.image_updated = False
-# ------------------ Sidebar Controls ------------------
-st.sidebar.header("🔧 Settings")
-if st.sidebar.button("🔄 Clear Chat"):
-    st.session_state.messages = []
-    st.session_state.thread_id = None
-    st.session_state.image_url = None
-    st.session_state.image_updated = False
-    st.rerun()
-show_image = st.sidebar.checkbox("📖 Show Document Image", value=True)
-# ------------------ Layout ------------------
-left, center, right = st.columns([1, 2, 1])
-# ------------------ Left Column: Document Image ------------------
-with left:
-    st.subheader("📄 Document Image")
-    if show_image and st.session_state.image_url:
-        try:
-            image = Image.open(requests.get(st.session_state.image_url, stream=True).raw)
-            st.image(image, caption="📑 Extracted Page", use_container_width=True)
-            st.session_state.image_updated = False
-        except Exception as e:
-            st.warning("⚠️ Could not load image.")
-# ------------------ Center Column: Chat UI ------------------
-with center:
-    st.subheader("💬 Document AI Assistant")
-    for message in st.session_state.messages:
-        role, content = message["role"], message["content"]
-        st.chat_message(role).write(content)
-    if prompt := st.chat_input("Type your question about the document..."):
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        st.chat_message("user").write(prompt)
-        try:
-            if st.session_state.thread_id is None:
-                thread = client.beta.threads.create()
-                st.session_state.thread_id = thread.id
-            thread_id = st.session_state.thread_id
-            client.beta.threads.messages.create(
-                thread_id=thread_id,
-                role="user",
-                content=prompt
-            )
-            run = client.beta.threads.runs.create(
-                thread_id=thread_id,
-                assistant_id=ASSISTANT_ID
-            )
-            with st.spinner("Assistant is thinking..."):
-                while True:
-                    run_status = client.beta.threads.runs.retrieve(
-                        thread_id=thread_id,
-                        run_id=run.id
-                    )
-                    if run_status.status == "completed":
-                        break
-                    time.sleep(1)
-            messages = client.beta.threads.messages.list(thread_id=thread_id)
-            assistant_message = None
-            for message in reversed(messages.data):
-                if message.role == "assistant":
-                    assistant_message = message.content[0].text.value
-                    break
-            st.chat_message("assistant").write(assistant_message)
-            st.session_state.messages.append({"role": "assistant", "content": assistant_message})
-            # Extract GitHub image URL
-            image_match = re.search(
-                r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
-                assistant_message
-            )
-            if image_match:
-                st.session_state.image_url = image_match.group(0)
-                st.session_state.image_updated = True
-                st.rerun()
-        except Exception as e:
-            st.error(f"❌ Error: {str(e)}")
-# ------------------ Right Column: Structured Summary + FAQ (Button-based) ------------------
-with right:
-    st.subheader("📌 Summary & FAQ (from Structured Data)")
-    col1, col2 = st.columns(2)
-    show_summary = col1.button("📝 Load Summary")
-    show_faq = col2.button("❓ Load FAQ")
-    summary_text = "Click the button to load summary."
-    faq_list = []
-    if st.session_state.image_url:
-        match = re.search(r'/(\d{3})\.png', st.session_state.image_url)
-        if match:
-            page_number = int(match.group(1))
-            page_entry = next((entry for entry in structured_data if entry.get("page_number") == page_number), None)
-            if page_entry:
-                if show_summary:
-                    summary_text = page_entry.get("summary", "No summary available.")
-                if show_faq:
-                    faq_list = page_entry.get("faqs", []) or page_entry.get("questions", [])
-    # Display Summary
-    if show_summary:
-        st.subheader("📝 Summary")
-        st.markdown(summary_text)
-    # Display FAQs
-    if show_faq:
-        st.subheader("❓ Auto-Generated FAQ")
-        if faq_list:
-            for faq in faq_list:
-                if isinstance(faq, dict):
-                    st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}")
-                else:
-                    st.markdown(f"**Q:** {faq}")
         else:
-            st.info("No FAQs available for this page.")

 import streamlit as st
 import json
+# Load the JSON file once with caching
+@st.cache_data
+def load_data():
+    with open("51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output.json", "r") as f:
+        return json.load(f)
+data = load_data()
+st.title("📘 Surgical Pathology Manual - Page Summary & FAQ")
+# Step 1: Get all page numbers where 'page' exists and is digit
+page_numbers = sorted({
+    int(entry["page"])
+    for entry in data
+    if "page" in entry and str(entry["page"]).isdigit()
+})
+# Step 2: User selects page
+selected_page = st.selectbox("Select Page Number", page_numbers)
+# Step 3: Filter entries by page
+page_content = [
+    entry for entry in data
+    if str(entry.get("page", "")) == str(selected_page)
+]
+# Step 4: Display summary and FAQs
+if page_content:
+    for section in page_content:
+        section_title = section.get("section_heading", "Untitled Section")
+        summary = section.get("summary", None)
+        faq = section.get("faq", [])
+        st.markdown(f"### 🧠 Section: {section_title}")
+        # Show summary
+        if summary and isinstance(summary, str):
+            st.markdown("#### 🔍 Summary")
+            st.write(summary)
+        else:
+            st.info("No summary available for this section.")
+        # Show FAQ
+        if faq and isinstance(faq, list) and any("question" in q for q in faq):
+            st.markdown("#### ❓ FAQ")
+            for qna in faq:
+                question = qna.get("question", "").strip()
+                answer = qna.get("answer", "").strip()
+                if question:
+                    st.markdown(f"**Q:** {question}")
+                    st.markdown(f"**A:** {answer if answer else '_No answer provided._'}")
         else:
+            st.info("No FAQs available for this section.")
+else:
+    st.warning("No content found for the selected page.")