Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,11 @@ import os
|
|
3 |
import time
|
4 |
import re
|
5 |
from openai import OpenAI
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# ------------------ App Configuration ------------------
|
8 |
st.set_page_config(page_title="Document AI Assistant", layout="wide")
|
@@ -29,6 +34,8 @@ if "image_url" not in st.session_state:
|
|
29 |
st.session_state.image_url = None
|
30 |
if "image_updated" not in st.session_state:
|
31 |
st.session_state.image_updated = False
|
|
|
|
|
32 |
|
33 |
# ------------------ Sidebar Controls ------------------
|
34 |
st.sidebar.header("π§ Settings")
|
@@ -37,59 +44,101 @@ if st.sidebar.button("π Clear Chat"):
|
|
37 |
st.session_state.thread_id = None
|
38 |
st.session_state.image_url = None
|
39 |
st.session_state.image_updated = False
|
|
|
40 |
st.rerun()
|
41 |
|
42 |
show_image = st.sidebar.checkbox("π Show Document Image", value=True)
|
|
|
43 |
|
44 |
# ------------------ Split Layout ------------------
|
45 |
-
col1, col2 = st.columns([1, 2])
|
46 |
|
47 |
# ------------------ Image Panel (Left) ------------------
|
48 |
with col1:
|
49 |
if show_image and st.session_state.image_url:
|
50 |
st.image(st.session_state.image_url, caption="π Extracted Page", use_container_width=True)
|
51 |
-
st.session_state.image_updated = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
# ------------------ Chat Panel (Right) ------------------
|
54 |
with col2:
|
55 |
-
# πΌ Chat input stays at the top
|
56 |
if prompt := st.chat_input("Type your question about the document..."):
|
57 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
58 |
st.chat_message("user").write(prompt)
|
59 |
|
60 |
try:
|
61 |
-
# Initialize thread if needed
|
62 |
if st.session_state.thread_id is None:
|
63 |
thread = client.beta.threads.create()
|
64 |
st.session_state.thread_id = thread.id
|
65 |
|
66 |
thread_id = st.session_state.thread_id
|
|
|
|
|
67 |
|
68 |
-
# Send message to assistant
|
69 |
-
client.beta.threads.messages.create(
|
70 |
-
thread_id=thread_id,
|
71 |
-
role="user",
|
72 |
-
content=prompt
|
73 |
-
)
|
74 |
-
|
75 |
-
# Run assistant
|
76 |
-
run = client.beta.threads.runs.create(
|
77 |
-
thread_id=thread_id,
|
78 |
-
assistant_id=ASSISTANT_ID
|
79 |
-
)
|
80 |
-
|
81 |
-
# Wait for assistant response
|
82 |
with st.spinner("π€ Assistant is thinking..."):
|
83 |
while True:
|
84 |
-
run_status = client.beta.threads.runs.retrieve(
|
85 |
-
thread_id=thread_id,
|
86 |
-
run_id=run.id
|
87 |
-
)
|
88 |
if run_status.status == "completed":
|
89 |
break
|
90 |
time.sleep(1)
|
91 |
|
92 |
-
# Get assistant response
|
93 |
messages = client.beta.threads.messages.list(thread_id=thread_id)
|
94 |
assistant_message = None
|
95 |
for message in reversed(messages.data):
|
@@ -100,9 +149,8 @@ with col2:
|
|
100 |
st.chat_message("assistant").write(assistant_message)
|
101 |
st.session_state.messages.append({"role": "assistant", "content": assistant_message})
|
102 |
|
103 |
-
# Extract GitHub image URL if present
|
104 |
image_match = re.search(
|
105 |
-
r'https://raw
|
106 |
assistant_message
|
107 |
)
|
108 |
if image_match:
|
@@ -113,8 +161,6 @@ with col2:
|
|
113 |
except Exception as e:
|
114 |
st.error(f"β Error: {str(e)}")
|
115 |
|
116 |
-
# π½ Show previous messages below the input
|
117 |
for message in reversed(st.session_state.messages):
|
118 |
role, content = message["role"], message["content"]
|
119 |
st.chat_message(role).write(content)
|
120 |
-
|
|
|
3 |
import time
|
4 |
import re
|
5 |
from openai import OpenAI
|
6 |
+
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase
|
7 |
+
import av
|
8 |
+
import numpy as np
|
9 |
+
import tempfile
|
10 |
+
import soundfile as sf
|
11 |
|
12 |
# ------------------ App Configuration ------------------
|
13 |
st.set_page_config(page_title="Document AI Assistant", layout="wide")
|
|
|
34 |
st.session_state.image_url = None
|
35 |
if "image_updated" not in st.session_state:
|
36 |
st.session_state.image_updated = False
|
37 |
+
if "transcript_preview" not in st.session_state:
|
38 |
+
st.session_state.transcript_preview = None
|
39 |
|
40 |
# ------------------ Sidebar Controls ------------------
|
41 |
st.sidebar.header("π§ Settings")
|
|
|
44 |
st.session_state.thread_id = None
|
45 |
st.session_state.image_url = None
|
46 |
st.session_state.image_updated = False
|
47 |
+
st.session_state.transcript_preview = None
|
48 |
st.rerun()
|
49 |
|
50 |
show_image = st.sidebar.checkbox("π Show Document Image", value=True)
|
51 |
+
preview_toggle = st.sidebar.checkbox("π Preview transcription before sending", value=True)
|
52 |
|
53 |
# ------------------ Split Layout ------------------
|
54 |
+
col1, col2 = st.columns([1, 2])
|
55 |
|
56 |
# ------------------ Image Panel (Left) ------------------
|
57 |
with col1:
|
58 |
if show_image and st.session_state.image_url:
|
59 |
st.image(st.session_state.image_url, caption="π Extracted Page", use_container_width=True)
|
60 |
+
st.session_state.image_updated = False
|
61 |
+
|
62 |
+
# ------------------ Voice Input Processing ------------------
|
63 |
+
class AudioRecorder(AudioProcessorBase):
|
64 |
+
def __init__(self):
|
65 |
+
self.recorded_frames = []
|
66 |
+
|
67 |
+
def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
|
68 |
+
self.recorded_frames.append(frame)
|
69 |
+
return frame
|
70 |
+
|
71 |
+
with col2:
|
72 |
+
st.markdown("### ποΈ Voice Input (Optional)")
|
73 |
+
webrtc_ctx = webrtc_streamer(
|
74 |
+
key="voice-input",
|
75 |
+
mode="SENDONLY",
|
76 |
+
audio_processor_factory=AudioRecorder,
|
77 |
+
media_stream_constraints={"audio": True, "video": False},
|
78 |
+
async_processing=True,
|
79 |
+
)
|
80 |
+
|
81 |
+
if webrtc_ctx.audio_processor and not webrtc_ctx.state.playing and webrtc_ctx.audio_processor.recorded_frames:
|
82 |
+
st.info("Transcribing your voice...")
|
83 |
+
wav_path = tempfile.mktemp(suffix=".wav")
|
84 |
+
with open(wav_path, "wb") as f:
|
85 |
+
frames = webrtc_ctx.audio_processor.recorded_frames
|
86 |
+
audio = frames[0].to_ndarray()
|
87 |
+
for frame in frames[1:]:
|
88 |
+
audio = np.concatenate((audio, frame.to_ndarray()), axis=1)
|
89 |
+
sf.write(f, audio.T, samplerate=frames[0].sample_rate, format="WAV")
|
90 |
+
|
91 |
+
audio_file = open(wav_path, "rb")
|
92 |
+
try:
|
93 |
+
whisper_result = client.audio.transcriptions.create(model="whisper-1", file=audio_file, response_format="json")
|
94 |
+
transcript = whisper_result.text.strip()
|
95 |
+
confidence = whisper_result.get("confidence", "N/A")
|
96 |
+
|
97 |
+
if transcript:
|
98 |
+
st.success(f"Recognized: {transcript}")
|
99 |
+
st.caption(f"π§ Confidence: {confidence}")
|
100 |
+
if preview_toggle:
|
101 |
+
st.session_state.transcript_preview = transcript
|
102 |
+
else:
|
103 |
+
st.session_state.messages.append({"role": "user", "content": transcript})
|
104 |
+
st.rerun()
|
105 |
+
except Exception as e:
|
106 |
+
st.error(f"β Transcription failed: {str(e)}")
|
107 |
+
|
108 |
+
if st.session_state.transcript_preview:
|
109 |
+
st.markdown("---")
|
110 |
+
st.markdown("### π Transcription Preview")
|
111 |
+
st.markdown(f"> {st.session_state.transcript_preview}")
|
112 |
+
if st.button("β
Send to Assistant"):
|
113 |
+
st.session_state.messages.append({"role": "user", "content": st.session_state.transcript_preview})
|
114 |
+
st.session_state.transcript_preview = None
|
115 |
+
st.rerun()
|
116 |
+
if st.button("β Discard"):
|
117 |
+
st.session_state.transcript_preview = None
|
118 |
+
st.rerun()
|
119 |
|
120 |
# ------------------ Chat Panel (Right) ------------------
|
121 |
with col2:
|
|
|
122 |
if prompt := st.chat_input("Type your question about the document..."):
|
123 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
124 |
st.chat_message("user").write(prompt)
|
125 |
|
126 |
try:
|
|
|
127 |
if st.session_state.thread_id is None:
|
128 |
thread = client.beta.threads.create()
|
129 |
st.session_state.thread_id = thread.id
|
130 |
|
131 |
thread_id = st.session_state.thread_id
|
132 |
+
client.beta.threads.messages.create(thread_id=thread_id, role="user", content=prompt)
|
133 |
+
run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
with st.spinner("π€ Assistant is thinking..."):
|
136 |
while True:
|
137 |
+
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
|
|
|
|
|
|
|
138 |
if run_status.status == "completed":
|
139 |
break
|
140 |
time.sleep(1)
|
141 |
|
|
|
142 |
messages = client.beta.threads.messages.list(thread_id=thread_id)
|
143 |
assistant_message = None
|
144 |
for message in reversed(messages.data):
|
|
|
149 |
st.chat_message("assistant").write(assistant_message)
|
150 |
st.session_state.messages.append({"role": "assistant", "content": assistant_message})
|
151 |
|
|
|
152 |
image_match = re.search(
|
153 |
+
r'https://raw\\.githubusercontent\\.com/AndrewLORTech/surgical-pathology-manual/main/[\\w\\-/]*\\.png',
|
154 |
assistant_message
|
155 |
)
|
156 |
if image_match:
|
|
|
161 |
except Exception as e:
|
162 |
st.error(f"β Error: {str(e)}")
|
163 |
|
|
|
164 |
for message in reversed(st.session_state.messages):
|
165 |
role, content = message["role"], message["content"]
|
166 |
st.chat_message(role).write(content)
|
|