Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -70,7 +70,7 @@ class WebSocketClient:
|
|
70 |
if data["type"] == "conversation.item.input_audio_transcription.delta":
|
71 |
self.transcript += data["delta"]
|
72 |
|
73 |
-
#
|
74 |
def create_ws():
|
75 |
cid = str(uuid.uuid4())
|
76 |
client = WebSocketClient(WS_URI, HEADERS, cid)
|
@@ -136,22 +136,23 @@ def clear_chat_and_transcript(client_id):
|
|
136 |
connections[client_id].transcript = ""
|
137 |
return [], "", None, None
|
138 |
|
139 |
-
def toggle_record_visibility(is_visible):
|
140 |
-
return not is_visible, gr.update(visible=not is_visible)
|
141 |
-
|
142 |
# UI
|
143 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
144 |
gr.Markdown("# 📄 Document AI Assistant")
|
145 |
|
146 |
gr.HTML("""
|
147 |
<style>
|
148 |
-
#ask-btn, #clear-chat-btn
|
149 |
font-size: 16px !important;
|
150 |
padding: 10px 24px !important;
|
151 |
margin-top: 6px;
|
152 |
}
|
153 |
-
#audio
|
154 |
-
|
|
|
|
|
|
|
|
|
155 |
}
|
156 |
</style>
|
157 |
""")
|
@@ -160,7 +161,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
160 |
thread_state = gr.State()
|
161 |
image_state = gr.State()
|
162 |
client_id = gr.State()
|
163 |
-
voice_enabled = gr.State(False)
|
164 |
|
165 |
with gr.Row(equal_height=True):
|
166 |
with gr.Column(scale=1):
|
@@ -173,36 +173,25 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
173 |
send_btn = gr.Button("Send", variant="primary", scale=2)
|
174 |
|
175 |
with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
|
176 |
-
voice_input = gr.Audio(label="🎙️
|
177 |
voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
|
178 |
|
179 |
with gr.Row():
|
180 |
-
record_toggle_btn = gr.Button("🎙️ Record", elem_id="record-btn")
|
181 |
ask_btn = gr.Button("🟢 Ask", elem_id="ask-btn")
|
182 |
clear_chat_btn = gr.Button("🧹 Clear Chat", elem_id="clear-chat-btn")
|
183 |
|
184 |
-
#
|
185 |
send_btn.click(fn=handle_chat,
|
186 |
inputs=[user_prompt, chat_state, thread_state, image_state],
|
187 |
outputs=[user_prompt, chat, thread_state, image_state])
|
188 |
image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
|
189 |
-
|
190 |
voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
|
191 |
-
|
192 |
-
record_toggle_btn.click(
|
193 |
-
fn=toggle_record_visibility,
|
194 |
-
inputs=[voice_enabled],
|
195 |
-
outputs=[voice_enabled, voice_input]
|
196 |
-
)
|
197 |
-
|
198 |
ask_btn.click(fn=send_transcript_to_assistant,
|
199 |
inputs=[voice_transcript, chat_state, thread_state, image_state],
|
200 |
outputs=[user_prompt, chat, thread_state, image_state])
|
201 |
-
|
202 |
clear_chat_btn.click(fn=clear_chat_and_transcript,
|
203 |
inputs=[client_id],
|
204 |
outputs=[chat, voice_transcript, thread_state, image_state])
|
205 |
-
|
206 |
app.load(fn=create_ws, outputs=[client_id])
|
207 |
|
208 |
app.launch()
|
|
|
70 |
if data["type"] == "conversation.item.input_audio_transcription.delta":
|
71 |
self.transcript += data["delta"]
|
72 |
|
73 |
+
# WebSocket connection setup
|
74 |
def create_ws():
|
75 |
cid = str(uuid.uuid4())
|
76 |
client = WebSocketClient(WS_URI, HEADERS, cid)
|
|
|
136 |
connections[client_id].transcript = ""
|
137 |
return [], "", None, None
|
138 |
|
|
|
|
|
|
|
139 |
# UI
|
140 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
141 |
gr.Markdown("# 📄 Document AI Assistant")
|
142 |
|
143 |
gr.HTML("""
|
144 |
<style>
|
145 |
+
#ask-btn, #clear-chat-btn {
|
146 |
font-size: 16px !important;
|
147 |
padding: 10px 24px !important;
|
148 |
margin-top: 6px;
|
149 |
}
|
150 |
+
#record-audio button {
|
151 |
+
font-size: 16px !important;
|
152 |
+
padding: 12px 24px !important;
|
153 |
+
background-color: #f2f2f2 !important;
|
154 |
+
border-radius: 6px;
|
155 |
+
margin-top: 6px;
|
156 |
}
|
157 |
</style>
|
158 |
""")
|
|
|
161 |
thread_state = gr.State()
|
162 |
image_state = gr.State()
|
163 |
client_id = gr.State()
|
|
|
164 |
|
165 |
with gr.Row(equal_height=True):
|
166 |
with gr.Column(scale=1):
|
|
|
173 |
send_btn = gr.Button("Send", variant="primary", scale=2)
|
174 |
|
175 |
with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
|
176 |
+
voice_input = gr.Audio(label="🎙️ Record", streaming=True, elem_id="record-audio")
|
177 |
voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
|
178 |
|
179 |
with gr.Row():
|
|
|
180 |
ask_btn = gr.Button("🟢 Ask", elem_id="ask-btn")
|
181 |
clear_chat_btn = gr.Button("🧹 Clear Chat", elem_id="clear-chat-btn")
|
182 |
|
183 |
+
# Bindings
|
184 |
send_btn.click(fn=handle_chat,
|
185 |
inputs=[user_prompt, chat_state, thread_state, image_state],
|
186 |
outputs=[user_prompt, chat, thread_state, image_state])
|
187 |
image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
|
|
|
188 |
voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
ask_btn.click(fn=send_transcript_to_assistant,
|
190 |
inputs=[voice_transcript, chat_state, thread_state, image_state],
|
191 |
outputs=[user_prompt, chat, thread_state, image_state])
|
|
|
192 |
clear_chat_btn.click(fn=clear_chat_and_transcript,
|
193 |
inputs=[client_id],
|
194 |
outputs=[chat, voice_transcript, thread_state, image_state])
|
|
|
195 |
app.load(fn=create_ws, outputs=[client_id])
|
196 |
|
197 |
app.launch()
|