File size: 6,253 Bytes
8c4492e
4102a15
 
 
13919c8
 
 
 
1c29e60
b386f62
8c4492e
 
 
 
c1043ca
9c9251a
8c4492e
 
c1043ca
9c9251a
8c4492e
809b532
8c4492e
c1043ca
8c4492e
c1043ca
8c4492e
 
 
 
 
 
 
 
 
4102a15
 
c1043ca
8c4492e
 
 
 
 
 
 
4102a15
8c4492e
c1043ca
8c4492e
4102a15
8c4492e
9c9251a
4102a15
9c9251a
 
 
 
 
4102a15
 
 
 
 
1c29e60
 
 
4102a15
1c29e60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4102a15
 
 
 
 
 
 
 
 
 
 
 
9c9251a
 
 
 
 
 
 
 
 
 
 
 
 
4102a15
 
9c9251a
809b532
9c9251a
4102a15
9c9251a
 
 
 
 
 
 
 
 
3bbf4ab
 
9c9251a
 
bcaf273
 
1c29e60
bcaf273
 
 
 
 
809b532
57d0c38
8c4492e
9c9251a
809b532
 
 
1c29e60
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import streamlit as st
import numpy as np
import tempfile
import soundfile as sf
import os
import time
import re
from openai import OpenAI
from streamlit_audio_recorder import audio_recorder

# ------------------ App Configuration ------------------
st.set_page_config(page_title="Document AI Assistant", layout="wide")
st.title("πŸ“„ Document AI Assistant")
st.caption("Chat with an AI Assistant on your medical/pathology documents")

# ------------------ Load API Key and Assistant ID from Hugging Face Secrets ------------------
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
ASSISTANT_ID = os.environ.get("ASSISTANT_ID")

# ------------------ Error Handling for Missing Secrets ------------------
if not OPENAI_API_KEY or not ASSISTANT_ID:
    st.error("❌ Missing secrets. Please ensure both OPENAI_API_KEY and ASSISTANT_ID are set in your Hugging Face Space secrets.")
    st.stop()

client = OpenAI(api_key=OPENAI_API_KEY)

# ------------------ Session State Initialization ------------------
if "messages" not in st.session_state:
    st.session_state.messages = []
if "thread_id" not in st.session_state:
    st.session_state.thread_id = None
if "image_url" not in st.session_state:
    st.session_state.image_url = None
if "image_updated" not in st.session_state:
    st.session_state.image_updated = False
if "transcript_preview" not in st.session_state:
    st.session_state.transcript_preview = None

# ------------------ Sidebar Controls ------------------
st.sidebar.header("πŸ”§ Settings")
if st.sidebar.button("πŸ”„ Clear Chat"):
    st.session_state.messages = []
    st.session_state.thread_id = None
    st.session_state.image_url = None
    st.session_state.image_updated = False
    st.session_state.transcript_preview = None
    st.rerun()

show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
preview_toggle = st.sidebar.checkbox("πŸ“ Preview transcription before sending", value=True)

# ------------------ Split Layout ------------------
col1, col2 = st.columns([1, 2])

# ------------------ Image Panel (Left) ------------------
with col1:
    if show_image and st.session_state.image_url:
        st.image(st.session_state.image_url, caption="πŸ“‘ Extracted Page", use_container_width=True)
        st.session_state.image_updated = False

# ------------------ Voice Input Processing ------------------
with col2:
    st.markdown("### πŸŽ™οΈ Voice Input (Optional)")
    audio_bytes = audio_recorder(pause_threshold=2.0)

    if audio_bytes:
        st.info("Transcribing your voice...")
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
            tmp.write(audio_bytes)
            tmp_path = tmp.name

        with open(tmp_path, "rb") as audio_file:
            try:
                whisper_result = client.audio.transcriptions.create(
                    model="whisper-1",
                    file=audio_file,
                    response_format="json"
                )
                transcript = whisper_result.text.strip()
                confidence = whisper_result.get("confidence", "N/A")

                if transcript:
                    st.success(f"Recognized: {transcript}")
                    st.caption(f"🧠 Confidence: {confidence}")
                    if preview_toggle:
                        st.session_state.transcript_preview = transcript
                    else:
                        st.session_state.messages.append({"role": "user", "content": transcript})
                        st.rerun()
            except Exception as e:
                st.error(f"❌ Transcription failed: {str(e)}")

    if st.session_state.transcript_preview:
        st.markdown("---")
        st.markdown("### πŸ“ Transcription Preview")
        st.markdown(f"> {st.session_state.transcript_preview}")
        if st.button("βœ… Send to Assistant"):
            st.session_state.messages.append({"role": "user", "content": st.session_state.transcript_preview})
            st.session_state.transcript_preview = None
            st.rerun()
        if st.button("❌ Discard"):
            st.session_state.transcript_preview = None
            st.rerun()

# ------------------ Chat Panel (Right) ------------------
with col2:
    if prompt := st.chat_input("Type your question about the document..."):
        st.session_state.messages.append({"role": "user", "content": prompt})
        st.chat_message("user").write(prompt)

        try:
            if st.session_state.thread_id is None:
                thread = client.beta.threads.create()
                st.session_state.thread_id = thread.id

            thread_id = st.session_state.thread_id
            client.beta.threads.messages.create(thread_id=thread_id, role="user", content=prompt)
            run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)

            with st.spinner("πŸ€– Assistant is thinking..."):
                while True:
                    run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
                    if run_status.status == "completed":
                        break
                    time.sleep(1)

            messages = client.beta.threads.messages.list(thread_id=thread_id)
            assistant_message = None
            for message in reversed(messages.data):
                if message.role == "assistant":
                    assistant_message = message.content[0].text.value
                    break

            st.chat_message("assistant").write(assistant_message)
            st.session_state.messages.append({"role": "assistant", "content": assistant_message})

            image_match = re.search(
                r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                assistant_message
            )
            if image_match:
                st.session_state.image_url = image_match.group(0)
                st.session_state.image_updated = True
                st.rerun()

        except Exception as e:
            st.error(f"❌ Error: {str(e)}")

    for message in reversed(st.session_state.messages):
        role, content = message["role"], message["content"]
        st.chat_message(role).write(content)