IAMTFRMZA commited on
Commit
eb04d10
Β·
verified Β·
1 Parent(s): f383782

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -20
app.py CHANGED
@@ -20,7 +20,7 @@ if not OPENAI_API_KEY or not ASSISTANT_ID:
20
 
21
  client = OpenAI(api_key=OPENAI_API_KEY)
22
 
23
- # ------------------ Chat Session Logic ------------------
24
  session_threads = {}
25
 
26
  def reset_session():
@@ -71,12 +71,14 @@ def extract_image_url(text):
71
  )
72
  return match.group(0) if match else None
73
 
74
- def chat_with_image(message, history, session_id):
75
- reply = process_chat(message, history, session_id)
76
- image_url = extract_image_url(reply)
77
- return reply, image_url
 
 
78
 
79
- # ------------------ Voice Transcription ------------------
80
  def create_websocket_client():
81
  client_id = str(uuid.uuid4())
82
  connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
@@ -95,7 +97,7 @@ def send_audio_chunk(audio, client_id):
95
  connections[client_id].enqueue_audio_chunk(sr, y)
96
  return connections[client_id].transcript
97
 
98
- # ------------------ Gradio UI ------------------
99
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
100
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
101
 
@@ -105,20 +107,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
105
  with gr.Row():
106
  with gr.Column(scale=1):
107
  image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=400)
 
108
  with gr.Column(scale=2):
109
- chatbot = gr.ChatInterface(
110
- fn=chat_with_image,
111
- additional_inputs=[session_id],
112
- outputs=["text", "image"],
113
- examples=[
114
- ["What does clause 3.2 mean?"],
115
- ["Summarize the timeline from the image."]
116
- ],
117
- title="πŸ’¬ Document Assistant"
118
- )
119
- chatbot.render()
120
-
121
- # ------------------ Transcription Section ------------------
122
  gr.Markdown("## πŸŽ™οΈ Realtime Voice Transcription")
123
 
124
  with gr.Row():
 
20
 
21
  client = OpenAI(api_key=OPENAI_API_KEY)
22
 
23
+ # ------------------ Chat Threading ------------------
24
  session_threads = {}
25
 
26
  def reset_session():
 
71
  )
72
  return match.group(0) if match else None
73
 
74
+ def chat_handler(message, history, session_id):
75
+ history.append(("user", message))
76
+ response = process_chat(message, history, session_id)
77
+ history.append(("assistant", response))
78
+ image_url = extract_image_url(response)
79
+ return history, image_url
80
 
81
+ # ------------------ Transcription ------------------
82
  def create_websocket_client():
83
  client_id = str(uuid.uuid4())
84
  connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
 
97
  connections[client_id].enqueue_audio_chunk(sr, y)
98
  return connections[client_id].transcript
99
 
100
+ # ------------------ Gradio App ------------------
101
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
102
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
103
 
 
107
  with gr.Row():
108
  with gr.Column(scale=1):
109
  image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=400)
110
+
111
  with gr.Column(scale=2):
112
+ chatbot = gr.Chatbot(label="πŸ’¬ Document Assistant", height=400)
113
+ message_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What does clause 3.2 mean?")
114
+ send_button = gr.Button("Send")
115
+
116
+ # Send message logic
117
+ def user_send(msg, history, session_id):
118
+ return chat_handler(msg, history, session_id)
119
+
120
+ send_button.click(user_send, inputs=[message_input, chatbot, session_id], outputs=[chatbot, image_display])
121
+ message_input.submit(user_send, inputs=[message_input, chatbot, session_id], outputs=[chatbot, image_display])
122
+
123
+ # ------------------ Voice Section ------------------
 
124
  gr.Markdown("## πŸŽ™οΈ Realtime Voice Transcription")
125
 
126
  with gr.Row():