documentaitestv4

Sleeping

App Files Files Community

IAMTFRMZA commited on 24 days ago

Commit

ec26bc2

verified ·

1 Parent(s): f4c7018

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -12

app.py CHANGED Viewed

@@ -17,7 +17,6 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
 WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
 connections = {}
-# WebSocket Client
 class WebSocketClient:
     def __init__(self, uri, headers, client_id):
         self.uri = uri
@@ -70,7 +69,6 @@ class WebSocketClient:
             if data["type"] == "conversation.item.input_audio_transcription.delta":
                 self.transcript += data["delta"]
-# WebSocket Connection Manager
 def create_ws():
     cid = str(uuid.uuid4())
     client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -93,7 +91,6 @@ def clear_transcript_only(cid):
 def clear_chat_only():
     return [], None, None
-# Assistant chat handler
 def handle_chat(user_input, history, thread_id, image_url):
     if not OPENAI_API_KEY or not ASSISTANT_ID:
         return "❌ Missing secrets!", history, thread_id, image_url
@@ -116,6 +113,9 @@ def handle_chat(user_input, history, thread_id, image_url):
         for msg in reversed(msgs.data):
             if msg.role == "assistant":
                 content = msg.content[0].text.value
                 history.append((user_input, content))
                 match = re.search(
                     r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
@@ -130,7 +130,6 @@ def handle_chat(user_input, history, thread_id, image_url):
     except Exception as e:
         return f"❌ {e}", history, thread_id, image_url
-# Feed transcript as assistant input
 def feed_transcript(transcript, history, thread_id, image_url, cid):
     if not transcript.strip():
         return gr.update(), history, thread_id, image_url
@@ -138,7 +137,6 @@ def feed_transcript(transcript, history, thread_id, image_url, cid):
         connections[cid].transcript = ""
     return handle_chat(transcript, history, thread_id, image_url)
-# Fallback for image display
 def update_image_display(image_url):
     if image_url and isinstance(image_url, str) and image_url.startswith("http"):
         return image_url
@@ -157,6 +155,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
         border-radius: 8px !important;
         width: 100% !important;
         margin-top: 10px;
     }
     .voice-area {
         padding-top: 12px;
@@ -172,20 +171,27 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
     client_id = gr.State()
     with gr.Row(equal_height=True):
-        with gr.Column(scale=0.8):  # thinner image column
-            image_display = gr.Image(label="🖼️ Document", type="filepath", show_download_button=False)
             with gr.Column(elem_classes="voice-area"):
                 gr.Markdown("### 🎙️ Voice Input")
                 voice_input = gr.Audio(label="Tap to Record", streaming=True, type="numpy", show_label=True)
                 voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
-                with gr.Row():
-                    voice_send_btn = gr.Button("🟢 Send Voice to Assistant", elem_classes="big-btn")
-                    clear_transcript_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
-        with gr.Column(scale=2):  # wider chat column
-            chat = gr.Chatbot(label="💬 Chat", height=460)
             with gr.Row():
                 user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)

 WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
 connections = {}
 class WebSocketClient:
     def __init__(self, uri, headers, client_id):
         self.uri = uri
             if data["type"] == "conversation.item.input_audio_transcription.delta":
                 self.transcript += data["delta"]
 def create_ws():
     cid = str(uuid.uuid4())
     client = WebSocketClient(WS_URI, HEADERS, cid)
 def clear_chat_only():
     return [], None, None
 def handle_chat(user_input, history, thread_id, image_url):
     if not OPENAI_API_KEY or not ASSISTANT_ID:
         return "❌ Missing secrets!", history, thread_id, image_url
         for msg in reversed(msgs.data):
             if msg.role == "assistant":
                 content = msg.content[0].text.value
+                # Optional: prevent repeating fallback messages
+                # if history and content == history[-1][1]:
+                #     content += "\n🔁 Try asking a different type of question!"
                 history.append((user_input, content))
                 match = re.search(
                     r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
     except Exception as e:
         return f"❌ {e}", history, thread_id, image_url
 def feed_transcript(transcript, history, thread_id, image_url, cid):
     if not transcript.strip():
         return gr.update(), history, thread_id, image_url
         connections[cid].transcript = ""
     return handle_chat(transcript, history, thread_id, image_url)
 def update_image_display(image_url):
     if image_url and isinstance(image_url, str) and image_url.startswith("http"):
         return image_url
         border-radius: 8px !important;
         width: 100% !important;
         margin-top: 10px;
+        white-space: nowrap;
     }
     .voice-area {
         padding-top: 12px;
     client_id = gr.State()
     with gr.Row(equal_height=True):
+        with gr.Column(scale=0.8):
+            image_display = gr.Image(
+                label="🖼️ Document",
+                type="filepath",
+                show_download_button=False,
+                height=480  # taller preview
+            )
             with gr.Column(elem_classes="voice-area"):
                 gr.Markdown("### 🎙️ Voice Input")
                 voice_input = gr.Audio(label="Tap to Record", streaming=True, type="numpy", show_label=True)
                 voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
+                with gr.Row(equal_height=True):
+                    with gr.Column(scale=1):
+                        voice_send_btn = gr.Button("🟢 Send Voice to Assistant", elem_classes="big-btn")
+                    with gr.Column(scale=1):
+                        clear_transcript_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
+        with gr.Column(scale=2):
+            chat = gr.Chatbot(label="💬 Chat", height=480)
             with gr.Row():
                 user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)