documentaitestv4

Sleeping

App Files Files Community

IAMTFRMZA commited on 18 days ago

Commit

bafde5e

verified ·

1 Parent(s): 3bfe4cd

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -34

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import os, time, re, json, base64, asyncio, threading, uuid, io
 import numpy as np
@@ -7,7 +8,7 @@ from openai import OpenAI
 from websockets import connect
 from dotenv import load_dotenv
-# ============ Load Secrets ============
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 ASSISTANT_ID = os.getenv("ASSISTANT_ID")
@@ -17,10 +18,12 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
 WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
 connections = {}
-# ============ WebSocket Client ============
 class WebSocketClient:
     def __init__(self, uri, headers, client_id):
-        self.uri, self.headers, self.client_id = uri, headers, client_id
         self.websocket = None
         self.queue = asyncio.Queue(maxsize=10)
         self.transcript = ""
@@ -68,7 +71,7 @@ class WebSocketClient:
             if data["type"] == "conversation.item.input_audio_transcription.delta":
                 self.transcript += data["delta"]
-# ============ Connection Manager ============
 def create_ws():
     cid = str(uuid.uuid4())
     client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -103,8 +106,7 @@ def handle_chat(user_input, history, thread_id, image_url):
         while True:
             status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
-            if status.status == "completed":
-                break
             time.sleep(1)
         msgs = client.beta.threads.messages.list(thread_id=thread_id)
@@ -116,8 +118,7 @@ def handle_chat(user_input, history, thread_id, image_url):
                     r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                     content
                 )
-                if match:
-                    image_url = match.group(0)
                 break
         return "", history, thread_id, image_url
@@ -125,26 +126,15 @@ def handle_chat(user_input, history, thread_id, image_url):
     except Exception as e:
         return f"❌ {e}", history, thread_id, image_url
-# ============ Auto-Send Voice Toggle ============
-def maybe_send_transcript(transcript, history, thread_id, image_url, voice_only_enabled, client_id):
-    if voice_only_enabled and transcript.strip():
-        # Clear transcript after sending
-        if client_id in connections:
-            connections[client_id].transcript = ""
-        return handle_chat(transcript, history, thread_id, image_url)
-    return transcript, history, thread_id, image_url
 # ============ Gradio UI ============
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# 📄 Document AI Assistant")
-    # STATES
     chat_state = gr.State([])
     thread_state = gr.State()
     image_state = gr.State()
     client_id = gr.State()
     voice_enabled = gr.State(False)
-    voice_only_state = gr.State(True)
     with gr.Row(equal_height=True):
         with gr.Column(scale=1):
@@ -163,9 +153,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
                     voice_input = gr.Audio(label="Mic", streaming=True)
                     voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
                 clear_btn = gr.Button("🧹 Clear Transcript")
-                voice_only_toggle = gr.Checkbox(label="Voice-Only Mode 🎤➡️💬", value=True)
-    # UI Event Bindings
     def toggle_voice(curr):
         return not curr, gr.update(visible=not curr)
@@ -174,21 +163,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
                    inputs=[user_prompt, chat_state, thread_state, image_state],
                    outputs=[user_prompt, chat, thread_state, image_state])
     image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
-    # Real-time audio streaming
     voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
     clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
-    # Auto-send voice transcript if Voice-Only Mode is enabled
-    voice_input.change(
-        fn=maybe_send_transcript,
-        inputs=[voice_transcript, chat_state, thread_state, image_state, voice_only_state, client_id],
-        outputs=[user_prompt, chat, thread_state, image_state]
-    )
-    voice_only_toggle.change(fn=lambda x: x, inputs=voice_only_toggle, outputs=voice_only_state)
-    # Initialize WebSocket connection
     app.load(fn=create_ws, outputs=[client_id])
 app.launch()

+# top of the file
 import gradio as gr
 import os, time, re, json, base64, asyncio, threading, uuid, io
 import numpy as np
 from websockets import connect
 from dotenv import load_dotenv
+# Load secrets
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 ASSISTANT_ID = os.getenv("ASSISTANT_ID")
 WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
 connections = {}
+# WebSocket Client
 class WebSocketClient:
     def __init__(self, uri, headers, client_id):
+        self.uri = uri
+        self.headers = headers
+        self.client_id = client_id
         self.websocket = None
         self.queue = asyncio.Queue(maxsize=10)
         self.transcript = ""
             if data["type"] == "conversation.item.input_audio_transcription.delta":
                 self.transcript += data["delta"]
+# Real-time transcription connection manager
 def create_ws():
     cid = str(uuid.uuid4())
     client = WebSocketClient(WS_URI, HEADERS, cid)
         while True:
             status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
+            if status.status == "completed": break
             time.sleep(1)
         msgs = client.beta.threads.messages.list(thread_id=thread_id)
                     r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                     content
                 )
+                if match: image_url = match.group(0)
                 break
         return "", history, thread_id, image_url
     except Exception as e:
         return f"❌ {e}", history, thread_id, image_url
 # ============ Gradio UI ============
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# 📄 Document AI Assistant")
     chat_state = gr.State([])
     thread_state = gr.State()
     image_state = gr.State()
     client_id = gr.State()
     voice_enabled = gr.State(False)
     with gr.Row(equal_height=True):
         with gr.Column(scale=1):
                     voice_input = gr.Audio(label="Mic", streaming=True)
                     voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
                 clear_btn = gr.Button("🧹 Clear Transcript")
+    # Functional bindings
     def toggle_voice(curr):
         return not curr, gr.update(visible=not curr)
                    inputs=[user_prompt, chat_state, thread_state, image_state],
                    outputs=[user_prompt, chat, thread_state, image_state])
     image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
     voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
     clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
     app.load(fn=create_ws, outputs=[client_id])
 app.launch()