Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import time | |
import re | |
import requests | |
import tempfile | |
import wave | |
import numpy as np | |
from openai import OpenAI | |
from streamlit_audio_recorder import audio_recorder | |
# ------------------ Page Config ------------------ | |
st.set_page_config(page_title="Document AI Assistant", layout="wide") | |
st.title("π Document AI Assistant") | |
st.caption("Chat with an AI Assistant on your medical/pathology documents") | |
# ------------------ Load Secrets ------------------ | |
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") | |
ASSISTANT_ID = os.environ.get("ASSISTANT_ID") | |
if not OPENAI_API_KEY or not ASSISTANT_ID: | |
st.error("β Missing secrets. Please set both OPENAI_API_KEY and ASSISTANT_ID in Hugging Face Space settings.") | |
st.stop() | |
client = OpenAI(api_key=OPENAI_API_KEY) | |
# ------------------ Session State Init ------------------ | |
for key in ["messages", "thread_id", "image_url", "transcript"]: | |
if key not in st.session_state: | |
st.session_state[key] = [] if key == "messages" else None | |
# ------------------ Whisper Transcription ------------------ | |
def transcribe_audio(file_path, api_key): | |
with open(file_path, "rb") as f: | |
response = requests.post( | |
"https://api.openai.com/v1/audio/transcriptions", | |
headers={"Authorization": f"Bearer {api_key}"}, | |
files={"file": f}, | |
data={"model": "whisper-1"} | |
) | |
return response.json().get("text", None) | |
# ------------------ Sidebar & Layout ------------------ | |
st.sidebar.header("π§ Settings") | |
if st.sidebar.button("π Clear Chat"): | |
st.session_state.messages = [] | |
st.session_state.thread_id = None | |
st.session_state.image_url = None | |
st.session_state.transcript = None | |
st.rerun() | |
show_image = st.sidebar.checkbox("π Show Document Image", value=True) | |
col1, col2 = st.columns([1, 2]) | |
# ------------------ Image Panel ------------------ | |
with col1: | |
if show_image and st.session_state.image_url: | |
st.image(st.session_state.image_url, caption="π Extracted Page", use_container_width=True) | |
# ------------------ Chat + Mic Panel ------------------ | |
with col2: | |
for message in st.session_state.messages: | |
st.chat_message(message["role"]).write(message["content"]) | |
st.subheader("ποΈ Ask with Your Voice") | |
audio_bytes = audio_recorder(pause_threshold=3.0, energy_threshold=-1.0, sample_rate=44100) | |
if audio_bytes: | |
# Save temporary WAV file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile: | |
tmpfile.write(audio_bytes) | |
tmp_path = tmpfile.name | |
st.audio(tmp_path, format="audio/wav") | |
with st.spinner("π§ Transcribing..."): | |
transcript = transcribe_audio(tmp_path, OPENAI_API_KEY) | |
if transcript: | |
st.success("π Transcript: " + transcript) | |
st.session_state.transcript = transcript | |
# Submit Transcript to Assistant | |
if st.session_state.transcript: | |
if st.button("β Send Transcript to Assistant"): | |
user_input = st.session_state.transcript | |
st.session_state.transcript = None # reset | |
st.session_state.messages.append({"role": "user", "content": user_input}) | |
st.chat_message("user").write(user_input) | |
try: | |
if st.session_state.thread_id is None: | |
thread = client.beta.threads.create() | |
st.session_state.thread_id = thread.id | |
thread_id = st.session_state.thread_id | |
client.beta.threads.messages.create(thread_id=thread_id, role="user", content=user_input) | |
run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID) | |
with st.spinner("π€ Assistant is thinking..."): | |
while True: | |
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id) | |
if run_status.status == "completed": | |
break | |
time.sleep(1) | |
messages = client.beta.threads.messages.list(thread_id=thread_id) | |
assistant_message = next( | |
(m.content[0].text.value for m in reversed(messages.data) if m.role == "assistant"), None | |
) | |
st.chat_message("assistant").write(assistant_message) | |
st.session_state.messages.append({"role": "assistant", "content": assistant_message}) | |
# Extract GitHub image if available | |
image_match = re.search( | |
r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png', | |
assistant_message | |
) | |
if image_match: | |
st.session_state.image_url = image_match.group(0) | |
except Exception as e: | |
st.error(f"β Error: {str(e)}") | |
# Fallback text input | |
if prompt := st.chat_input("π¬ Or type your question..."): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
st.chat_message("user").write(prompt) | |
st.session_state.transcript = prompt # Treat like voice input for now | |