documentaitestv4

Sleeping

App Files Files Community

IAMTFRMZA commited on 18 days ago

Commit

dbf9e7a

verified ·

1 Parent(s): fa361c9

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -45

app.py CHANGED Viewed

@@ -4,10 +4,10 @@ import numpy as np
 import soundfile as sf
 from pydub import AudioSegment
 from openai import OpenAI
-from websockets import connect
 from dotenv import load_dotenv
-# Load environment variables
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 ASSISTANT_ID = os.getenv("ASSISTANT_ID")
@@ -45,10 +45,7 @@ class WebSocketClient:
             buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
             audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
             out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
-            await self.websocket.send(json.dumps({
-                "type": "input_audio_buffer.append",
-                "audio": base64.b64encode(out.read()).decode()
-            }))
     async def receive_messages(self):
         async for msg in self.websocket:
@@ -77,10 +74,10 @@ def clear_transcript(cid):
     if cid in connections: connections[cid].transcript = ""
     return ""
-# ---------------- Chat Assistant Logic ----------------
 def handle_chat(user_input, history, thread_id, image_url):
     if not OPENAI_API_KEY or not ASSISTANT_ID:
-        return "❌ Missing API key or Assistant ID.", history, thread_id, image_url
     try:
         if thread_id is None:
@@ -100,12 +97,8 @@ def handle_chat(user_input, history, thread_id, image_url):
             if msg.role == "assistant":
                 content = msg.content[0].text.value
                 history.append((user_input, content))
-                match = re.search(
-                    r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
-                    content
-                )
-                if match:
-                    image_url = match.group(0)
                 break
         return "", history, thread_id, image_url
@@ -113,49 +106,40 @@ def handle_chat(user_input, history, thread_id, image_url):
     except Exception as e:
         return f"❌ {e}", history, thread_id, image_url
-# ---------------- UI ----------------
-with gr.Blocks(theme="lone17/kotaemon") as app:
     gr.Markdown("# 📄 Document AI Assistant")
-    # States
     chat_state = gr.State([])
     thread_state = gr.State()
     image_state = gr.State()
     client_id = gr.State()
-    mic_shown = gr.State(False)
-    with gr.Row(equal_height=True):
-        # Left: Document Viewer
         with gr.Column(scale=1):
-            image_display = gr.Image(label="🖼️ Document Preview", type="filepath", show_download_button=False)
-        # Right: Chat + Mic
-        with gr.Column(scale=1.4):
             chat = gr.Chatbot(label="💬 Chat", height=450)
-            with gr.Row():
-                user_input = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=6)
-                mic_btn = gr.Button("🎙️", scale=1)
-                send_btn = gr.Button("Send", scale=2)
-            # Hidden Voice Section
-            with gr.Row(visible=False) as mic_row:
-                with gr.Column(scale=4):
-                    audio = gr.Audio(label="🎤 Speak", streaming=True)
-                with gr.Column(scale=5):
-                    transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
-                with gr.Column(scale=2):
-                    clear_btn = gr.Button("🧹 Clear")
-    # Logic Wiring
-    def toggle_mic(state): return not state, gr.update(visible=not state)
-    mic_btn.click(toggle_mic, inputs=mic_shown, outputs=[mic_shown, mic_row])
     send_btn.click(handle_chat,
-                   inputs=[user_input, chat_state, thread_state, image_state],
-                   outputs=[user_input, chat, thread_state, image_state])
     image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
-    audio.stream(fn=send_audio, inputs=[audio, client_id], outputs=transcript, stream_every=0.5)
-    clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=transcript)
-    app.load(fn=create_ws, outputs=[client_id])
 app.launch()

 import soundfile as sf
 from pydub import AudioSegment
 from openai import OpenAI
+from websockets import connect, Data, ClientConnection
 from dotenv import load_dotenv
+# ---------------- Environment & Client Setup ----------------
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 ASSISTANT_ID = os.getenv("ASSISTANT_ID")
             buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
             audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
             out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
+            await self.websocket.send(json.dumps({"type": "input_audio_buffer.append", "audio": base64.b64encode(out.read()).decode()}))
     async def receive_messages(self):
         async for msg in self.websocket:
     if cid in connections: connections[cid].transcript = ""
     return ""
+# ---------------- Chat Functionality ----------------
 def handle_chat(user_input, history, thread_id, image_url):
     if not OPENAI_API_KEY or not ASSISTANT_ID:
+        return "❌ Missing secrets!", history, thread_id, image_url
     try:
         if thread_id is None:
             if msg.role == "assistant":
                 content = msg.content[0].text.value
                 history.append((user_input, content))
+                match = re.search(r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png', content)
+                if match: image_url = match.group(0)
                 break
         return "", history, thread_id, image_url
     except Exception as e:
         return f"❌ {e}", history, thread_id, image_url
+# ---------------- Gradio UI Layout ----------------
+with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# 📄 Document AI Assistant")
+    # STATES
     chat_state = gr.State([])
     thread_state = gr.State()
     image_state = gr.State()
     client_id = gr.State()
+    with gr.Row():
         with gr.Column(scale=1):
+            # IMAGE VIEWER (left)
+            image_display = gr.Image(label="🖼️ Document", type="filepath")
+            # VOICE (under)
+            voice_transcript = gr.Textbox(label="🎙️ Transcript", lines=4, interactive=False)
+            voice_input = gr.Audio(label="🔴 Record", streaming=True)
+            clear_btn = gr.Button("🧹 Clear Transcript")
+        with gr.Column(scale=2):
+            # CHATBOT (right)
             chat = gr.Chatbot(label="💬 Chat", height=450)
+            user_prompt = gr.Textbox(show_label=False, placeholder="Ask your question...")
+            send_btn = gr.Button("Send")
+    # HANDLERS
     send_btn.click(handle_chat,
+                   inputs=[user_prompt, chat_state, thread_state, image_state],
+                   outputs=[user_prompt, chat, thread_state, image_state])
     image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
+    voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
+    clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
+    app.load(create_ws, outputs=[client_id])
 app.launch()