documentaitestv4

Sleeping

App Files Files Community

IAMTFRMZA commited on Apr 22

Commit

d051f5d

verified ·

1 Parent(s): a86432c

app.py

Browse files

Files changed (1) hide show

app.py +46 -66

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import os, time, re, json, base64, asyncio, threading, uuid, io
 import numpy as np
@@ -90,13 +91,9 @@ def clear_transcript_only(cid):
         connections[cid].transcript = ""
     return ""
-def clear_chat_only():
-    return [], None, None
-# Assistant chat handler
-def handle_chat(user_input, history, thread_id, image_url):
     if not OPENAI_API_KEY or not ASSISTANT_ID:
-        return "❌ Missing secrets!", history, thread_id, image_url
     try:
         if thread_id is None:
@@ -116,108 +113,91 @@ def handle_chat(user_input, history, thread_id, image_url):
         for msg in reversed(msgs.data):
             if msg.role == "assistant":
                 content = msg.content[0].text.value
-                history.append((user_input, content))
                 match = re.search(
                     r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                     content
                 )
-                if match:
-                    image_url = match.group(0)
-                break
-        return "", history, thread_id, image_url
     except Exception as e:
-        return f"❌ {e}", history, thread_id, image_url
-# Feed transcript as assistant input
-def feed_transcript(transcript, history, thread_id, image_url, cid):
     if not transcript.strip():
-        return gr.update(), history, thread_id, image_url
     if cid in connections:
         connections[cid].transcript = ""
-    return handle_chat(transcript, history, thread_id, image_url)
-# Fallback for image display
-def update_image_display(image_url):
-    if image_url and isinstance(image_url, str) and image_url.startswith("http"):
-        return image_url
-    return None
 # ============ Gradio UI ============
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("# 📄 Document AI Assistant")
     gr.HTML("""
     <style>
     .big-btn {
-        font-size: 18px !important;
-        padding: 14px 28px !important;
-        border-radius: 8px !important;
-        width: 100% !important;
-        margin-top: 10px;
     }
     .voice-area {
-        padding-top: 12px;
-        border-top: 1px solid #444;
-        margin-top: 12px;
     }
     </style>
     """)
-    chat_state = gr.State([])
     thread_state = gr.State()
-    image_state = gr.State()
     client_id = gr.State()
     with gr.Row(equal_height=True):
         with gr.Column(scale=1):
-            image_display = gr.Image(label="🖼️ Document", type="filepath", show_download_button=False)
-        with gr.Column(scale=1.4):
-            chat = gr.Chatbot(label="💬 Chat", height=460)
-            with gr.Row():
-                user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)
-                send_btn = gr.Button("Send", variant="primary", scale=2)
             with gr.Column(elem_classes="voice-area"):
-                gr.Markdown("### 🎙️ Voice Input")
-                voice_input = gr.Audio(label="Tap to Record", streaming=True, type="numpy", show_label=True)
-                voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
-                with gr.Row():
-                    voice_send_btn = gr.Button("🟢 Send Voice to Assistant", elem_classes="big-btn")
-                    clear_transcript_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
-                with gr.Row():
-                    clear_chat_btn = gr.Button("🗑️ Clear Chat", elem_classes="big-btn")
     # Bindings
-    send_btn.click(fn=handle_chat,
-                   inputs=[user_prompt, chat_state, thread_state, image_state],
-                   outputs=[user_prompt, chat, thread_state, image_state])
     voice_input.stream(fn=send_audio,
                        inputs=[voice_input, client_id],
-                       outputs=voice_transcript,
                        stream_every=0.5)
-    voice_send_btn.click(fn=feed_transcript,
-                         inputs=[voice_transcript, chat_state, thread_state, image_state, client_id],
-                         outputs=[user_prompt, chat, thread_state, image_state])
     clear_transcript_btn.click(fn=clear_transcript_only,
                                inputs=[client_id],
-                               outputs=voice_transcript)
-    clear_chat_btn.click(fn=clear_chat_only,
-                         outputs=[chat, thread_state, image_state])
-    image_state.change(fn=update_image_display,
-                       inputs=image_state,
-                       outputs=image_display)
     app.load(fn=create_ws, outputs=[client_id])

+# app.py
 import gradio as gr
 import os, time, re, json, base64, asyncio, threading, uuid, io
 import numpy as np
         connections[cid].transcript = ""
     return ""
+def handle_chat(user_input, thread_id):
     if not OPENAI_API_KEY or not ASSISTANT_ID:
+        return "❌ Missing secrets!", thread_id, "", None
     try:
         if thread_id is None:
         for msg in reversed(msgs.data):
             if msg.role == "assistant":
                 content = msg.content[0].text.value
                 match = re.search(
                     r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
                     content
                 )
+                image_url = match.group(0) if match else None
+                response = f"### ❓ Question\n{user_input}\n\n---\n\n### 💡 Answer\n{content}"
+                return response, thread_id, image_url
+        return "No response from assistant.", thread_id, None
     except Exception as e:
+        return f"❌ {e}", thread_id, None
+def feed_transcript(transcript, thread_id, cid):
     if not transcript.strip():
+        return gr.update(), thread_id, None
     if cid in connections:
         connections[cid].transcript = ""
+    return handle_chat(transcript, thread_id,)
 # ============ Gradio UI ============
+with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.HTML("""
     <style>
+    body {
+        font-family: 'Inter', sans-serif;
+        background-color: #f9f9fb;
+    }
     .big-btn {
+        font-size: 16px;
+        padding: 12px 20px;
+        border-radius: 12px;
+        width: 100%;
+        background-color: #4f46e5;
+        color: white;
+        border: none;
     }
     .voice-area {
+        padding-top: 16px;
+        margin-top: 16px;
+        border-top: 1px solid #ddd;
     }
     </style>
     """)
     thread_state = gr.State()
     client_id = gr.State()
     with gr.Row(equal_height=True):
         with gr.Column(scale=1):
+            user_input = gr.Textbox(placeholder="Ask your question...", label="Prompt")
+            submit_btn = gr.Button("🚀 Ask", variant="primary")
+            result_md = gr.Markdown()
+            image_output = gr.Image(label="🖼️ Preview", type="filepath", show_download_button=False)
             with gr.Column(elem_classes="voice-area"):
+                gr.Markdown("🎙️ Real-time Voice Input")
+                voice_input = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
+                transcript_box = gr.Textbox(label="Transcript", lines=2, interactive=False)
+                voice_submit_btn = gr.Button("Send Voice", elem_classes="big-btn")
+                clear_transcript_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
+        with gr.Column(scale=1.4):
+            gr.Markdown("### ⏱️ Assistant Response")
+            result_area = gr.Markdown()
     # Bindings
+    submit_btn.click(fn=handle_chat,
+                     inputs=[user_input, thread_state],
+                     outputs=[result_area, thread_state, image_output])
     voice_input.stream(fn=send_audio,
                        inputs=[voice_input, client_id],
+                       outputs=transcript_box,
                        stream_every=0.5)
+    voice_submit_btn.click(fn=feed_transcript,
+                           inputs=[transcript_box, thread_state, client_id],
+                           outputs=[result_area, thread_state, image_output])
     clear_transcript_btn.click(fn=clear_transcript_only,
                                inputs=[client_id],
+                               outputs=transcript_box)
     app.load(fn=create_ws, outputs=[client_id])