IAMTFRMZA commited on
Commit
f310bae
·
verified ·
1 Parent(s): c9731af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -17,7 +17,6 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
17
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
18
  connections = {}
19
 
20
- # WebSocket Client
21
  class WebSocketClient:
22
  def __init__(self, uri, headers, client_id):
23
  self.uri = uri
@@ -70,7 +69,6 @@ class WebSocketClient:
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
- # Connection manager
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -139,6 +137,12 @@ def clear_chat_and_transcript(client_id):
139
  connections[client_id].transcript = ""
140
  return [], "", None, None
141
 
 
 
 
 
 
 
142
  # UI
143
  with gr.Blocks(theme=gr.themes.Soft()) as app:
144
  gr.Markdown("# 📄 Document AI Assistant")
@@ -150,23 +154,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
150
  padding: 12px 28px !important;
151
  border-radius: 6px;
152
  margin-top: 10px;
153
- background-color: #f2f2f2 !important;
154
- color: #000 !important;
155
- white-space: nowrap;
156
- overflow: hidden;
157
- text-overflow: ellipsis;
 
 
 
158
  }
159
 
160
  button {
161
  margin-right: 8px;
162
  }
163
 
164
- /* Hide icon (optional) */
165
  #record-audio button svg {
166
  margin-right: 6px;
167
  }
168
 
169
- /* Hide internal label if redundant */
170
  #record-audio label {
171
  display: none;
172
  }
@@ -197,11 +202,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
197
  ask_btn = gr.Button("🟢 Ask", elem_id="ask-btn")
198
  clear_chat_btn = gr.Button("🧹 Clear Chat", elem_id="clear-chat-btn")
199
 
200
- # Bindings
201
  send_btn.click(fn=handle_chat,
202
  inputs=[user_prompt, chat_state, thread_state, image_state],
203
  outputs=[user_prompt, chat, thread_state, image_state])
204
- image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
205
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
206
 
207
  ask_btn.click(fn=send_transcript_to_assistant,
 
17
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
18
  connections = {}
19
 
 
20
  class WebSocketClient:
21
  def __init__(self, uri, headers, client_id):
22
  self.uri = uri
 
69
  if data["type"] == "conversation.item.input_audio_transcription.delta":
70
  self.transcript += data["delta"]
71
 
 
72
  def create_ws():
73
  cid = str(uuid.uuid4())
74
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
137
  connections[client_id].transcript = ""
138
  return [], "", None, None
139
 
140
+ # Fix image viewer fallback
141
+ def update_image_display(image_url):
142
+ if image_url and isinstance(image_url, str) and image_url.startswith("http"):
143
+ return image_url
144
+ return None
145
+
146
  # UI
147
  with gr.Blocks(theme=gr.themes.Soft()) as app:
148
  gr.Markdown("# 📄 Document AI Assistant")
 
154
  padding: 12px 28px !important;
155
  border-radius: 6px;
156
  margin-top: 10px;
157
+ background-color: #4b5563 !important;
158
+ color: white !important;
159
+ border: 1px solid #9ca3af !important;
160
+ }
161
+
162
+ #ask-btn:hover, #clear-chat-btn:hover, #record-audio button:hover {
163
+ background-color: #6b7280 !important;
164
+ color: #fff !important;
165
  }
166
 
167
  button {
168
  margin-right: 8px;
169
  }
170
 
 
171
  #record-audio button svg {
172
  margin-right: 6px;
173
  }
174
 
 
175
  #record-audio label {
176
  display: none;
177
  }
 
202
  ask_btn = gr.Button("🟢 Ask", elem_id="ask-btn")
203
  clear_chat_btn = gr.Button("🧹 Clear Chat", elem_id="clear-chat-btn")
204
 
205
+ # Functional bindings
206
  send_btn.click(fn=handle_chat,
207
  inputs=[user_prompt, chat_state, thread_state, image_state],
208
  outputs=[user_prompt, chat, thread_state, image_state])
209
+ image_state.change(fn=update_image_display, inputs=image_state, outputs=image_display)
210
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
211
 
212
  ask_btn.click(fn=send_transcript_to_assistant,