IAMTFRMZA commited on
Commit
c9731af
·
verified ·
1 Parent(s): 860955f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -70,7 +70,7 @@ class WebSocketClient:
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
- # WebSocket connection setup
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -82,11 +82,8 @@ def send_audio(chunk, cid):
82
  if not cid or cid not in connections:
83
  return "Connecting..."
84
  sr, arr = chunk
85
-
86
- # Reset transcript if it's been running long or restarted
87
  if len(connections[cid].transcript) > 1000:
88
  connections[cid].transcript = ""
89
-
90
  connections[cid].enqueue_audio_chunk(sr, arr)
91
  return connections[cid].transcript.strip()
92
 
@@ -95,7 +92,6 @@ def clear_transcript(cid):
95
  connections[cid].transcript = ""
96
  return ""
97
 
98
- # Chat assistant logic
99
  def handle_chat(user_input, history, thread_id, image_url):
100
  if not OPENAI_API_KEY or not ASSISTANT_ID:
101
  return "❌ Missing secrets!", history, thread_id, image_url
@@ -134,7 +130,6 @@ def handle_chat(user_input, history, thread_id, image_url):
134
  def send_transcript_to_assistant(transcript, history, thread_id, image_url, cid):
135
  if not transcript.strip():
136
  return gr.update(), history, thread_id, image_url
137
- # Clear transcript after sending
138
  if cid in connections:
139
  connections[cid].transcript = ""
140
  return handle_chat(transcript, history, thread_id, image_url)
@@ -156,12 +151,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
156
  border-radius: 6px;
157
  margin-top: 10px;
158
  background-color: #f2f2f2 !important;
 
 
 
 
159
  }
 
160
  button {
161
  margin-right: 8px;
162
  }
 
 
163
  #record-audio button svg {
164
- display: none !important;
 
 
 
 
 
165
  }
166
  </style>
167
  """)
@@ -182,7 +189,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
182
  send_btn = gr.Button("Send", variant="primary", scale=2)
183
 
184
  with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
185
- voice_input = gr.Audio(label="🎙️ Record", streaming=True, elem_id="record-audio")
 
186
  voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
187
 
188
  with gr.Row():
@@ -194,7 +202,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
194
  inputs=[user_prompt, chat_state, thread_state, image_state],
195
  outputs=[user_prompt, chat, thread_state, image_state])
196
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
197
-
198
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
199
 
200
  ask_btn.click(fn=send_transcript_to_assistant,
 
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
+ # Connection manager
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
82
  if not cid or cid not in connections:
83
  return "Connecting..."
84
  sr, arr = chunk
 
 
85
  if len(connections[cid].transcript) > 1000:
86
  connections[cid].transcript = ""
 
87
  connections[cid].enqueue_audio_chunk(sr, arr)
88
  return connections[cid].transcript.strip()
89
 
 
92
  connections[cid].transcript = ""
93
  return ""
94
 
 
95
  def handle_chat(user_input, history, thread_id, image_url):
96
  if not OPENAI_API_KEY or not ASSISTANT_ID:
97
  return "❌ Missing secrets!", history, thread_id, image_url
 
130
  def send_transcript_to_assistant(transcript, history, thread_id, image_url, cid):
131
  if not transcript.strip():
132
  return gr.update(), history, thread_id, image_url
 
133
  if cid in connections:
134
  connections[cid].transcript = ""
135
  return handle_chat(transcript, history, thread_id, image_url)
 
151
  border-radius: 6px;
152
  margin-top: 10px;
153
  background-color: #f2f2f2 !important;
154
+ color: #000 !important;
155
+ white-space: nowrap;
156
+ overflow: hidden;
157
+ text-overflow: ellipsis;
158
  }
159
+
160
  button {
161
  margin-right: 8px;
162
  }
163
+
164
+ /* Hide icon (optional) */
165
  #record-audio button svg {
166
+ margin-right: 6px;
167
+ }
168
+
169
+ /* Hide internal label if redundant */
170
+ #record-audio label {
171
+ display: none;
172
  }
173
  </style>
174
  """)
 
189
  send_btn = gr.Button("Send", variant="primary", scale=2)
190
 
191
  with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
192
+ gr.Markdown("**🎙️ Tap below to record your voice**")
193
+ voice_input = gr.Audio(label="", streaming=True, elem_id="record-audio")
194
  voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
195
 
196
  with gr.Row():
 
202
  inputs=[user_prompt, chat_state, thread_state, image_state],
203
  outputs=[user_prompt, chat, thread_state, image_state])
204
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
 
205
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
206
 
207
  ask_btn.click(fn=send_transcript_to_assistant,