Spaces:

IAMTFRMZA
/

documentaitestv2

Sleeping

App Files Files Community

IAMTFRMZA commited on 21 days ago

Commit

eb04d10

verified ·

1 Parent(s): f383782

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -20

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ if not OPENAI_API_KEY or not ASSISTANT_ID:
 client = OpenAI(api_key=OPENAI_API_KEY)
-# ------------------ Chat Session Logic ------------------
 session_threads = {}
 def reset_session():
@@ -71,12 +71,14 @@ def extract_image_url(text):
     )
     return match.group(0) if match else None
-def chat_with_image(message, history, session_id):
-    reply = process_chat(message, history, session_id)
-    image_url = extract_image_url(reply)
-    return reply, image_url
-# ------------------ Voice Transcription ------------------
 def create_websocket_client():
     client_id = str(uuid.uuid4())
     connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
@@ -95,7 +97,7 @@ def send_audio_chunk(audio, client_id):
     connections[client_id].enqueue_audio_chunk(sr, y)
     return connections[client_id].transcript
-# ------------------ Gradio UI ------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧠 Document AI + 🎙️ Voice Assistant")
@@ -105,20 +107,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=1):
             image_display = gr.Image(label="📑 Extracted Document Image", show_label=True, height=400)
         with gr.Column(scale=2):
-            chatbot = gr.ChatInterface(
-                fn=chat_with_image,
-                additional_inputs=[session_id],
-                outputs=["text", "image"],
-                examples=[
-                    ["What does clause 3.2 mean?"],
-                    ["Summarize the timeline from the image."]
-                ],
-                title="💬 Document Assistant"
-            )
-            chatbot.render()
-    # ------------------ Transcription Section ------------------
     gr.Markdown("## 🎙️ Realtime Voice Transcription")
     with gr.Row():

 client = OpenAI(api_key=OPENAI_API_KEY)
+# ------------------ Chat Threading ------------------
 session_threads = {}
 def reset_session():
     )
     return match.group(0) if match else None
+def chat_handler(message, history, session_id):
+    history.append(("user", message))
+    response = process_chat(message, history, session_id)
+    history.append(("assistant", response))
+    image_url = extract_image_url(response)
+    return history, image_url
+# ------------------ Transcription ------------------
 def create_websocket_client():
     client_id = str(uuid.uuid4())
     connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
     connections[client_id].enqueue_audio_chunk(sr, y)
     return connections[client_id].transcript
+# ------------------ Gradio App ------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧠 Document AI + 🎙️ Voice Assistant")
     with gr.Row():
         with gr.Column(scale=1):
             image_display = gr.Image(label="📑 Extracted Document Image", show_label=True, height=400)
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="💬 Document Assistant", height=400)
+            message_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What does clause 3.2 mean?")
+            send_button = gr.Button("Send")
+    # Send message logic
+    def user_send(msg, history, session_id):
+        return chat_handler(msg, history, session_id)
+    send_button.click(user_send, inputs=[message_input, chatbot, session_id], outputs=[chatbot, image_display])
+    message_input.submit(user_send, inputs=[message_input, chatbot, session_id], outputs=[chatbot, image_display])
+    # ------------------ Voice Section ------------------
     gr.Markdown("## 🎙️ Realtime Voice Transcription")
     with gr.Row():