Spaces:

IAMTFRMZA
/

documentaitest

Sleeping

App Files Files Community

documentaitest / app.py

IAMTFRMZA

Update app.py

81240ab verified about 1 month ago

raw

history blame

5.27 kB

	import streamlit as st
	import os
	import time
	import re
	import requests
	import tempfile
	import wave
	import numpy as np
	from openai import OpenAI
	from streamlit_audio_recorder import audio_recorder

	# ------------------ Page Config ------------------
	st.set_page_config(page_title="Document AI Assistant", layout="wide")
	st.title("📄 Document AI Assistant")
	st.caption("Chat with an AI Assistant on your medical/pathology documents")

	# ------------------ Load Secrets ------------------
	OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
	ASSISTANT_ID = os.environ.get("ASSISTANT_ID")

	if not OPENAI_API_KEY or not ASSISTANT_ID:
	st.error("❌ Missing secrets. Please set both OPENAI_API_KEY and ASSISTANT_ID in Hugging Face Space settings.")
	st.stop()

	client = OpenAI(api_key=OPENAI_API_KEY)

	# ------------------ Session State Init ------------------
	for key in ["messages", "thread_id", "image_url", "transcript"]:
	if key not in st.session_state:
	st.session_state[key] = [] if key == "messages" else None

	# ------------------ Whisper Transcription ------------------
	def transcribe_audio(file_path, api_key):
	with open(file_path, "rb") as f:
	response = requests.post(
	"https://api.openai.com/v1/audio/transcriptions",
	headers={"Authorization": f"Bearer {api_key}"},
	files={"file": f},
	data={"model": "whisper-1"}
	)
	return response.json().get("text", None)

	# ------------------ Sidebar & Layout ------------------
	st.sidebar.header("🔧 Settings")
	if st.sidebar.button("🔄 Clear Chat"):
	st.session_state.messages = []
	st.session_state.thread_id = None
	st.session_state.image_url = None
	st.session_state.transcript = None
	st.rerun()

	show_image = st.sidebar.checkbox("📖 Show Document Image", value=True)
	col1, col2 = st.columns([1, 2])

	# ------------------ Image Panel ------------------
	with col1:
	if show_image and st.session_state.image_url:
	st.image(st.session_state.image_url, caption="📑 Extracted Page", use_container_width=True)

	# ------------------ Chat + Mic Panel ------------------
	with col2:
	for message in st.session_state.messages:
	st.chat_message(message["role"]).write(message["content"])

	st.subheader("🎙️ Ask with Your Voice")

	audio_bytes = audio_recorder(pause_threshold=3.0, energy_threshold=-1.0, sample_rate=44100)

	if audio_bytes:
	# Save temporary WAV file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
	tmpfile.write(audio_bytes)
	tmp_path = tmpfile.name

	st.audio(tmp_path, format="audio/wav")

	with st.spinner("🧠 Transcribing..."):
	transcript = transcribe_audio(tmp_path, OPENAI_API_KEY)

	if transcript:
	st.success("📝 Transcript: " + transcript)
	st.session_state.transcript = transcript

	# Submit Transcript to Assistant
	if st.session_state.transcript:
	if st.button("✅ Send Transcript to Assistant"):
	user_input = st.session_state.transcript
	st.session_state.transcript = None # reset

	st.session_state.messages.append({"role": "user", "content": user_input})
	st.chat_message("user").write(user_input)

	try:
	if st.session_state.thread_id is None:
	thread = client.beta.threads.create()
	st.session_state.thread_id = thread.id

	thread_id = st.session_state.thread_id
	client.beta.threads.messages.create(thread_id=thread_id, role="user", content=user_input)
	run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)

	with st.spinner("🤖 Assistant is thinking..."):
	while True:
	run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
	if run_status.status == "completed":
	break
	time.sleep(1)

	messages = client.beta.threads.messages.list(thread_id=thread_id)
	assistant_message = next(
	(m.content[0].text.value for m in reversed(messages.data) if m.role == "assistant"), None
	)

	st.chat_message("assistant").write(assistant_message)
	st.session_state.messages.append({"role": "assistant", "content": assistant_message})

	# Extract GitHub image if available
	image_match = re.search(
	r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
	assistant_message
	)
	if image_match:
	st.session_state.image_url = image_match.group(0)

	except Exception as e:
	st.error(f"❌ Error: {str(e)}")

	# Fallback text input
	if prompt := st.chat_input("💬 Or type your question..."):
	st.session_state.messages.append({"role": "user", "content": prompt})
	st.chat_message("user").write(prompt)
	st.session_state.transcript = prompt # Treat like voice input for now