Spaces:

IAMTFRMZA
/

documentaitestv2

Sleeping

App Files Files Community

IAMTFRMZA commited on 18 days ago

Commit

f383782

verified ·

1 Parent(s): b74ae51

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -19

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ if not OPENAI_API_KEY or not ASSISTANT_ID:
 client = OpenAI(api_key=OPENAI_API_KEY)
-# ------------------ Chat Logic ------------------
 session_threads = {}
 def reset_session():
@@ -71,7 +71,12 @@ def extract_image_url(text):
     )
     return match.group(0) if match else None
-# ------------------ Transcription Logic ------------------
 def create_websocket_client():
     client_id = str(uuid.uuid4())
     connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
@@ -90,42 +95,30 @@ def send_audio_chunk(audio, client_id):
     connections[client_id].enqueue_audio_chunk(sr, y)
     return connections[client_id].transcript
-# ------------------ Gradio App ------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧠 Document AI + 🎙️ Voice Assistant")
     session_id = gr.State(value=reset_session())
     client_id = gr.State()
-    image_url = gr.State(value=None)
     with gr.Row():
         with gr.Column(scale=1):
             image_display = gr.Image(label="📑 Extracted Document Image", show_label=True, height=400)
         with gr.Column(scale=2):
             chatbot = gr.ChatInterface(
-                fn=lambda message, history, session_id: process_chat(message, history, session_id),
                 additional_inputs=[session_id],
                 examples=[
                     ["What does clause 3.2 mean?"],
                     ["Summarize the timeline from the image."]
                 ],
                 title="💬 Document Assistant"
             )
-    # Inject logic to extract image when assistant replies
-    def handle_reply_and_update_image(message, history, session_id):
-        response = process_chat(message, history, session_id)
-        url = extract_image_url(response)
-        return response, url
-    chatbot.fn = lambda message, history, session_id: handle_reply_and_update_image(message, history, session_id)[0]
-    chatbot.chatbot.change(
-        fn=lambda m, h, s: handle_reply_and_update_image(m, h, s)[1],
-        inputs=[chatbot.input, chatbot.chatbot, session_id],
-        outputs=image_display
-    )
-    # ------------------ Voice Transcription ------------------
     gr.Markdown("## 🎙️ Realtime Voice Transcription")
     with gr.Row():

 client = OpenAI(api_key=OPENAI_API_KEY)
+# ------------------ Chat Session Logic ------------------
 session_threads = {}
 def reset_session():
     )
     return match.group(0) if match else None
+def chat_with_image(message, history, session_id):
+    reply = process_chat(message, history, session_id)
+    image_url = extract_image_url(reply)
+    return reply, image_url
+# ------------------ Voice Transcription ------------------
 def create_websocket_client():
     client_id = str(uuid.uuid4())
     connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
     connections[client_id].enqueue_audio_chunk(sr, y)
     return connections[client_id].transcript
+# ------------------ Gradio UI ------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧠 Document AI + 🎙️ Voice Assistant")
     session_id = gr.State(value=reset_session())
     client_id = gr.State()
     with gr.Row():
         with gr.Column(scale=1):
             image_display = gr.Image(label="📑 Extracted Document Image", show_label=True, height=400)
         with gr.Column(scale=2):
             chatbot = gr.ChatInterface(
+                fn=chat_with_image,
                 additional_inputs=[session_id],
+                outputs=["text", "image"],
                 examples=[
                     ["What does clause 3.2 mean?"],
                     ["Summarize the timeline from the image."]
                 ],
                 title="💬 Document Assistant"
             )
+            chatbot.render()
+    # ------------------ Transcription Section ------------------
     gr.Markdown("## 🎙️ Realtime Voice Transcription")
     with gr.Row():