Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -70,7 +70,7 @@ class WebSocketClient:
|
|
70 |
if data["type"] == "conversation.item.input_audio_transcription.delta":
|
71 |
self.transcript += data["delta"]
|
72 |
|
73 |
-
#
|
74 |
def create_ws():
|
75 |
cid = str(uuid.uuid4())
|
76 |
client = WebSocketClient(WS_URI, HEADERS, cid)
|
@@ -82,11 +82,8 @@ def send_audio(chunk, cid):
|
|
82 |
if not cid or cid not in connections:
|
83 |
return "Connecting..."
|
84 |
sr, arr = chunk
|
85 |
-
|
86 |
-
# Reset transcript if it's been running long or restarted
|
87 |
if len(connections[cid].transcript) > 1000:
|
88 |
connections[cid].transcript = ""
|
89 |
-
|
90 |
connections[cid].enqueue_audio_chunk(sr, arr)
|
91 |
return connections[cid].transcript.strip()
|
92 |
|
@@ -95,7 +92,6 @@ def clear_transcript(cid):
|
|
95 |
connections[cid].transcript = ""
|
96 |
return ""
|
97 |
|
98 |
-
# Chat assistant logic
|
99 |
def handle_chat(user_input, history, thread_id, image_url):
|
100 |
if not OPENAI_API_KEY or not ASSISTANT_ID:
|
101 |
return "❌ Missing secrets!", history, thread_id, image_url
|
@@ -134,7 +130,6 @@ def handle_chat(user_input, history, thread_id, image_url):
|
|
134 |
def send_transcript_to_assistant(transcript, history, thread_id, image_url, cid):
|
135 |
if not transcript.strip():
|
136 |
return gr.update(), history, thread_id, image_url
|
137 |
-
# Clear transcript after sending
|
138 |
if cid in connections:
|
139 |
connections[cid].transcript = ""
|
140 |
return handle_chat(transcript, history, thread_id, image_url)
|
@@ -156,12 +151,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
156 |
border-radius: 6px;
|
157 |
margin-top: 10px;
|
158 |
background-color: #f2f2f2 !important;
|
|
|
|
|
|
|
|
|
159 |
}
|
|
|
160 |
button {
|
161 |
margin-right: 8px;
|
162 |
}
|
|
|
|
|
163 |
#record-audio button svg {
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
165 |
}
|
166 |
</style>
|
167 |
""")
|
@@ -182,7 +189,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
182 |
send_btn = gr.Button("Send", variant="primary", scale=2)
|
183 |
|
184 |
with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
|
185 |
-
|
|
|
186 |
voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
|
187 |
|
188 |
with gr.Row():
|
@@ -194,7 +202,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
194 |
inputs=[user_prompt, chat_state, thread_state, image_state],
|
195 |
outputs=[user_prompt, chat, thread_state, image_state])
|
196 |
image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
|
197 |
-
|
198 |
voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
|
199 |
|
200 |
ask_btn.click(fn=send_transcript_to_assistant,
|
|
|
70 |
if data["type"] == "conversation.item.input_audio_transcription.delta":
|
71 |
self.transcript += data["delta"]
|
72 |
|
73 |
+
# Connection manager
|
74 |
def create_ws():
|
75 |
cid = str(uuid.uuid4())
|
76 |
client = WebSocketClient(WS_URI, HEADERS, cid)
|
|
|
82 |
if not cid or cid not in connections:
|
83 |
return "Connecting..."
|
84 |
sr, arr = chunk
|
|
|
|
|
85 |
if len(connections[cid].transcript) > 1000:
|
86 |
connections[cid].transcript = ""
|
|
|
87 |
connections[cid].enqueue_audio_chunk(sr, arr)
|
88 |
return connections[cid].transcript.strip()
|
89 |
|
|
|
92 |
connections[cid].transcript = ""
|
93 |
return ""
|
94 |
|
|
|
95 |
def handle_chat(user_input, history, thread_id, image_url):
|
96 |
if not OPENAI_API_KEY or not ASSISTANT_ID:
|
97 |
return "❌ Missing secrets!", history, thread_id, image_url
|
|
|
130 |
def send_transcript_to_assistant(transcript, history, thread_id, image_url, cid):
|
131 |
if not transcript.strip():
|
132 |
return gr.update(), history, thread_id, image_url
|
|
|
133 |
if cid in connections:
|
134 |
connections[cid].transcript = ""
|
135 |
return handle_chat(transcript, history, thread_id, image_url)
|
|
|
151 |
border-radius: 6px;
|
152 |
margin-top: 10px;
|
153 |
background-color: #f2f2f2 !important;
|
154 |
+
color: #000 !important;
|
155 |
+
white-space: nowrap;
|
156 |
+
overflow: hidden;
|
157 |
+
text-overflow: ellipsis;
|
158 |
}
|
159 |
+
|
160 |
button {
|
161 |
margin-right: 8px;
|
162 |
}
|
163 |
+
|
164 |
+
/* Hide icon (optional) */
|
165 |
#record-audio button svg {
|
166 |
+
margin-right: 6px;
|
167 |
+
}
|
168 |
+
|
169 |
+
/* Hide internal label if redundant */
|
170 |
+
#record-audio label {
|
171 |
+
display: none;
|
172 |
}
|
173 |
</style>
|
174 |
""")
|
|
|
189 |
send_btn = gr.Button("Send", variant="primary", scale=2)
|
190 |
|
191 |
with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
|
192 |
+
gr.Markdown("**🎙️ Tap below to record your voice**")
|
193 |
+
voice_input = gr.Audio(label="", streaming=True, elem_id="record-audio")
|
194 |
voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
|
195 |
|
196 |
with gr.Row():
|
|
|
202 |
inputs=[user_prompt, chat_state, thread_state, image_state],
|
203 |
outputs=[user_prompt, chat, thread_state, image_state])
|
204 |
image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
|
|
|
205 |
voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
|
206 |
|
207 |
ask_btn.click(fn=send_transcript_to_assistant,
|