IAMTFRMZA commited on
Commit
dacdb50
·
verified ·
1 Parent(s): d2baa88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -21
app.py CHANGED
@@ -70,7 +70,7 @@ class WebSocketClient:
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
- # Real-time transcription connection manager
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -136,22 +136,23 @@ def clear_chat_and_transcript(client_id):
136
  connections[client_id].transcript = ""
137
  return [], "", None, None
138
 
139
- def toggle_record_visibility(is_visible):
140
- return not is_visible, gr.update(visible=not is_visible)
141
-
142
  # UI
143
  with gr.Blocks(theme=gr.themes.Soft()) as app:
144
  gr.Markdown("# 📄 Document AI Assistant")
145
 
146
  gr.HTML("""
147
  <style>
148
- #ask-btn, #clear-chat-btn, #record-btn {
149
  font-size: 16px !important;
150
  padding: 10px 24px !important;
151
  margin-top: 6px;
152
  }
153
- #audio-stream button {
154
- display: none !important;
 
 
 
 
155
  }
156
  </style>
157
  """)
@@ -160,7 +161,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
160
  thread_state = gr.State()
161
  image_state = gr.State()
162
  client_id = gr.State()
163
- voice_enabled = gr.State(False)
164
 
165
  with gr.Row(equal_height=True):
166
  with gr.Column(scale=1):
@@ -173,36 +173,25 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
173
  send_btn = gr.Button("Send", variant="primary", scale=2)
174
 
175
  with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
176
- voice_input = gr.Audio(label="🎙️ Mic Input", streaming=True, visible=False, elem_id="audio-stream")
177
  voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
178
 
179
  with gr.Row():
180
- record_toggle_btn = gr.Button("🎙️ Record", elem_id="record-btn")
181
  ask_btn = gr.Button("🟢 Ask", elem_id="ask-btn")
182
  clear_chat_btn = gr.Button("🧹 Clear Chat", elem_id="clear-chat-btn")
183
 
184
- # Functional bindings
185
  send_btn.click(fn=handle_chat,
186
  inputs=[user_prompt, chat_state, thread_state, image_state],
187
  outputs=[user_prompt, chat, thread_state, image_state])
188
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
189
-
190
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
191
-
192
- record_toggle_btn.click(
193
- fn=toggle_record_visibility,
194
- inputs=[voice_enabled],
195
- outputs=[voice_enabled, voice_input]
196
- )
197
-
198
  ask_btn.click(fn=send_transcript_to_assistant,
199
  inputs=[voice_transcript, chat_state, thread_state, image_state],
200
  outputs=[user_prompt, chat, thread_state, image_state])
201
-
202
  clear_chat_btn.click(fn=clear_chat_and_transcript,
203
  inputs=[client_id],
204
  outputs=[chat, voice_transcript, thread_state, image_state])
205
-
206
  app.load(fn=create_ws, outputs=[client_id])
207
 
208
  app.launch()
 
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
+ # WebSocket connection setup
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
136
  connections[client_id].transcript = ""
137
  return [], "", None, None
138
 
 
 
 
139
  # UI
140
  with gr.Blocks(theme=gr.themes.Soft()) as app:
141
  gr.Markdown("# 📄 Document AI Assistant")
142
 
143
  gr.HTML("""
144
  <style>
145
+ #ask-btn, #clear-chat-btn {
146
  font-size: 16px !important;
147
  padding: 10px 24px !important;
148
  margin-top: 6px;
149
  }
150
+ #record-audio button {
151
+ font-size: 16px !important;
152
+ padding: 12px 24px !important;
153
+ background-color: #f2f2f2 !important;
154
+ border-radius: 6px;
155
+ margin-top: 6px;
156
  }
157
  </style>
158
  """)
 
161
  thread_state = gr.State()
162
  image_state = gr.State()
163
  client_id = gr.State()
 
164
 
165
  with gr.Row(equal_height=True):
166
  with gr.Column(scale=1):
 
173
  send_btn = gr.Button("Send", variant="primary", scale=2)
174
 
175
  with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
176
+ voice_input = gr.Audio(label="🎙️ Record", streaming=True, elem_id="record-audio")
177
  voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
178
 
179
  with gr.Row():
 
180
  ask_btn = gr.Button("🟢 Ask", elem_id="ask-btn")
181
  clear_chat_btn = gr.Button("🧹 Clear Chat", elem_id="clear-chat-btn")
182
 
183
+ # Bindings
184
  send_btn.click(fn=handle_chat,
185
  inputs=[user_prompt, chat_state, thread_state, image_state],
186
  outputs=[user_prompt, chat, thread_state, image_state])
187
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
 
188
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
 
 
 
 
 
 
 
189
  ask_btn.click(fn=send_transcript_to_assistant,
190
  inputs=[voice_transcript, chat_state, thread_state, image_state],
191
  outputs=[user_prompt, chat, thread_state, image_state])
 
192
  clear_chat_btn.click(fn=clear_chat_and_transcript,
193
  inputs=[client_id],
194
  outputs=[chat, voice_transcript, thread_state, image_state])
 
195
  app.load(fn=create_ws, outputs=[client_id])
196
 
197
  app.launch()