Spaces:

IAMTFRMZA
/

documentaitest

Sleeping

App Files Files Community

IAMTFRMZA commited on Apr 15

Commit

e760d91

verified ·

1 Parent(s): 7783ebe

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -3

app.py CHANGED Viewed

@@ -1,10 +1,41 @@
-import base64
 import requests
 import tempfile
-import streamlit as st
 from openai import OpenAI
-# Whisper transcription function
 def transcribe_audio(file_path, api_key):
     with open(file_path, "rb") as f:
         response = requests.post(
@@ -14,3 +45,132 @@ def transcribe_audio(file_path, api_key):
             data={"model": "whisper-1"}
         )
     return response.json().get("text", None)

+import streamlit as st
+import os
+import time
+import re
 import requests
 import tempfile
 from openai import OpenAI
+from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
+import av
+import numpy as np
+import wave
+# ------------------ Configuration ------------------
+st.set_page_config(page_title="Document AI Assistant", layout="wide")
+st.title("📄 Document AI Assistant")
+st.caption("Chat with an AI Assistant on your medical/pathology documents")
+# ------------------ Secrets ------------------
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
+if not OPENAI_API_KEY or not ASSISTANT_ID:
+    st.error("❌ Missing secrets. Please set both OPENAI_API_KEY and ASSISTANT_ID in your Hugging Face Space settings.")
+    st.stop()
+client = OpenAI(api_key=OPENAI_API_KEY)
+# ------------------ Session State ------------------
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "thread_id" not in st.session_state:
+    st.session_state.thread_id = None
+if "image_url" not in st.session_state:
+    st.session_state.image_url = None
+if "audio_buffer" not in st.session_state:
+    st.session_state.audio_buffer = []
+# ------------------ Whisper Transcription ------------------
 def transcribe_audio(file_path, api_key):
     with open(file_path, "rb") as f:
         response = requests.post(
             data={"model": "whisper-1"}
         )
     return response.json().get("text", None)
+# ------------------ Audio Recorder ------------------
+class AudioProcessor:
+    def __init__(self):
+        self.frames = []
+    def recv(self, frame):
+        audio = frame.to_ndarray()
+        self.frames.append(audio)
+        return av.AudioFrame.from_ndarray(audio, layout="mono")
+def save_wav(frames, path, rate=48000):
+    audio_data = np.concatenate(frames)
+    with wave.open(path, 'wb') as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)
+        wf.setframerate(rate)
+        wf.writeframes(audio_data.tobytes())
+# ------------------ Sidebar & Image Panel ------------------
+st.sidebar.header("🔧 Settings")
+if st.sidebar.button("🔄 Clear Chat"):
+    st.session_state.messages = []
+    st.session_state.thread_id = None
+    st.session_state.image_url = None
+    st.rerun()
+show_image = st.sidebar.checkbox("📖 Show Document Image", value=True)
+col1, col2 = st.columns([1, 2])
+with col1:
+    if show_image and st.session_state.image_url:
+        st.image(st.session_state.image_url, caption="📑 Extracted Page", use_container_width=True)
+# ------------------ Chat & Voice Panel ------------------
+with col2:
+    # Display previous messages
+    for message in st.session_state.messages:
+        st.chat_message(message["role"]).write(message["content"])
+    # 🎤 Real-time voice recorder
+    st.subheader("🎙️ Ask with your voice")
+    audio_ctx = webrtc_streamer(
+        key="speech",
+        mode=WebRtcMode.SENDONLY,
+        in_audio_enabled=True,
+        audio_receiver_size=256,
+        client_settings=ClientSettings(
+            media_stream_constraints={"audio": True, "video": False},
+            rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]},
+        ),
+    )
+    if audio_ctx.audio_receiver:
+        audio_processor = AudioProcessor()
+        result = audio_ctx.audio_receiver.recv()
+        audio_data = result.to_ndarray()
+        st.session_state.audio_buffer.append(audio_data)
+        # ⏱️ Auto stop after short time
+        if len(st.session_state.audio_buffer) > 30:  # about 3s
+            tmp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
+            save_wav(st.session_state.audio_buffer, tmp_path)
+            st.session_state.audio_buffer = []
+            with st.spinner("🧠 Transcribing..."):
+                transcript = transcribe_audio(tmp_path, OPENAI_API_KEY)
+            if transcript:
+                st.success("📝 " + transcript)
+                st.session_state.messages.append({"role": "user", "content": transcript})
+                st.chat_message("user").write(transcript)
+                prompt = transcript
+                # ---- Assistant interaction ----
+                try:
+                    if st.session_state.thread_id is None:
+                        thread = client.beta.threads.create()
+                        st.session_state.thread_id = thread.id
+                    thread_id = st.session_state.thread_id
+                    client.beta.threads.messages.create(
+                        thread_id=thread_id,
+                        role="user",
+                        content=prompt
+                    )
+                    run = client.beta.threads.runs.create(
+                        thread_id=thread_id,
+                        assistant_id=ASSISTANT_ID
+                    )
+                    with st.spinner("Assistant is thinking..."):
+                        while True:
+                            run_status = client.beta.threads.runs.retrieve(
+                                thread_id=thread_id,
+                                run_id=run.id
+                            )
+                            if run_status.status == "completed":
+                                break
+                            time.sleep(1)
+                    messages = client.beta.threads.messages.list(thread_id=thread_id)
+                    assistant_message = None
+                    for message in reversed(messages.data):
+                        if message.role == "assistant":
+                            assistant_message = message.content[0].text.value
+                            break
+                    st.chat_message("assistant").write(assistant_message)
+                    st.session_state.messages.append({"role": "assistant", "content": assistant_message})
+                    # Image link extract
+                    image_match = re.search(
+                        r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
+                        assistant_message
+                    )
+                    if image_match:
+                        st.session_state.image_url = image_match.group(0)
+                except Exception as e:
+                    st.error(f"❌ Error: {str(e)}")
+    # Fallback text input
+    if prompt := st.chat_input("💬 Or type your question..."):
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        st.chat_message("user").write(prompt)
+        # Send prompt to assistant logic follows same flow above (you can wrap in a function)