IAMTFRMZA commited on
Commit
a86432c
Β·
verified Β·
1 Parent(s): e0cd5da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -27
app.py CHANGED
@@ -70,7 +70,7 @@ class WebSocketClient:
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
- # Real-time transcription connection manager
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -83,14 +83,17 @@ def send_audio(chunk, cid):
83
  return "Connecting..."
84
  sr, arr = chunk
85
  connections[cid].enqueue_audio_chunk(sr, arr)
86
- return connections[cid].transcript
87
 
88
- def clear_transcript(cid):
89
  if cid in connections:
90
  connections[cid].transcript = ""
91
  return ""
92
 
93
- # ============ Chat Assistant ============
 
 
 
94
  def handle_chat(user_input, history, thread_id, image_url):
95
  if not OPENAI_API_KEY or not ASSISTANT_ID:
96
  return "❌ Missing secrets!", history, thread_id, image_url
@@ -105,7 +108,8 @@ def handle_chat(user_input, history, thread_id, image_url):
105
 
106
  while True:
107
  status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
108
- if status.status == "completed": break
 
109
  time.sleep(1)
110
 
111
  msgs = client.beta.threads.messages.list(thread_id=thread_id)
@@ -117,7 +121,8 @@ def handle_chat(user_input, history, thread_id, image_url):
117
  r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
118
  content
119
  )
120
- if match: image_url = match.group(0)
 
121
  break
122
 
123
  return "", history, thread_id, image_url
@@ -125,6 +130,20 @@ def handle_chat(user_input, history, thread_id, image_url):
125
  except Exception as e:
126
  return f"❌ {e}", history, thread_id, image_url
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  # ============ Gradio UI ============
129
  with gr.Blocks(theme=gr.themes.Soft()) as app:
130
  gr.Markdown("# πŸ“„ Document AI Assistant")
@@ -162,7 +181,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
162
  user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)
163
  send_btn = gr.Button("Send", variant="primary", scale=2)
164
 
165
- # === Voice Transcription Section ===
166
  with gr.Column(elem_classes="voice-area"):
167
  gr.Markdown("### πŸŽ™οΈ Voice Input")
168
 
@@ -171,39 +189,35 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
171
 
172
  with gr.Row():
173
  voice_send_btn = gr.Button("🟒 Send Voice to Assistant", elem_classes="big-btn")
174
- voice_clear_btn = gr.Button("🧹 Clear", elem_classes="big-btn")
175
 
176
- # ============ Functional Bindings ============
 
 
 
177
  send_btn.click(fn=handle_chat,
178
  inputs=[user_prompt, chat_state, thread_state, image_state],
179
  outputs=[user_prompt, chat, thread_state, image_state])
180
 
181
- image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
182
-
183
  voice_input.stream(fn=send_audio,
184
  inputs=[voice_input, client_id],
185
  outputs=voice_transcript,
186
  stream_every=0.5)
187
 
188
- def feed_voice_to_assistant(transcript, history, thread_id, image_url, cid):
189
- if not transcript.strip():
190
- return gr.update(), history, thread_id, image_url
191
- if cid in connections:
192
- connections[cid].transcript = ""
193
- return handle_chat(transcript, history, thread_id, image_url)
194
-
195
- def clear_all(cid):
196
- if cid in connections:
197
- connections[cid].transcript = ""
198
- return [], "", None, None
199
-
200
- voice_send_btn.click(fn=feed_voice_to_assistant,
201
  inputs=[voice_transcript, chat_state, thread_state, image_state, client_id],
202
  outputs=[user_prompt, chat, thread_state, image_state])
203
 
204
- voice_clear_btn.click(fn=clear_all,
205
- inputs=[client_id],
206
- outputs=[chat, voice_transcript, thread_state, image_state])
 
 
 
 
 
 
 
207
 
208
  app.load(fn=create_ws, outputs=[client_id])
209
 
 
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
+ # WebSocket Connection Manager
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
83
  return "Connecting..."
84
  sr, arr = chunk
85
  connections[cid].enqueue_audio_chunk(sr, arr)
86
+ return connections[cid].transcript.strip()
87
 
88
+ def clear_transcript_only(cid):
89
  if cid in connections:
90
  connections[cid].transcript = ""
91
  return ""
92
 
93
+ def clear_chat_only():
94
+ return [], None, None
95
+
96
+ # Assistant chat handler
97
  def handle_chat(user_input, history, thread_id, image_url):
98
  if not OPENAI_API_KEY or not ASSISTANT_ID:
99
  return "❌ Missing secrets!", history, thread_id, image_url
 
108
 
109
  while True:
110
  status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
111
+ if status.status == "completed":
112
+ break
113
  time.sleep(1)
114
 
115
  msgs = client.beta.threads.messages.list(thread_id=thread_id)
 
121
  r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
122
  content
123
  )
124
+ if match:
125
+ image_url = match.group(0)
126
  break
127
 
128
  return "", history, thread_id, image_url
 
130
  except Exception as e:
131
  return f"❌ {e}", history, thread_id, image_url
132
 
133
+ # Feed transcript as assistant input
134
+ def feed_transcript(transcript, history, thread_id, image_url, cid):
135
+ if not transcript.strip():
136
+ return gr.update(), history, thread_id, image_url
137
+ if cid in connections:
138
+ connections[cid].transcript = ""
139
+ return handle_chat(transcript, history, thread_id, image_url)
140
+
141
+ # Fallback for image display
142
+ def update_image_display(image_url):
143
+ if image_url and isinstance(image_url, str) and image_url.startswith("http"):
144
+ return image_url
145
+ return None
146
+
147
  # ============ Gradio UI ============
148
  with gr.Blocks(theme=gr.themes.Soft()) as app:
149
  gr.Markdown("# πŸ“„ Document AI Assistant")
 
181
  user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)
182
  send_btn = gr.Button("Send", variant="primary", scale=2)
183
 
 
184
  with gr.Column(elem_classes="voice-area"):
185
  gr.Markdown("### πŸŽ™οΈ Voice Input")
186
 
 
189
 
190
  with gr.Row():
191
  voice_send_btn = gr.Button("🟒 Send Voice to Assistant", elem_classes="big-btn")
192
+ clear_transcript_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
193
 
194
+ with gr.Row():
195
+ clear_chat_btn = gr.Button("πŸ—‘οΈ Clear Chat", elem_classes="big-btn")
196
+
197
+ # Bindings
198
  send_btn.click(fn=handle_chat,
199
  inputs=[user_prompt, chat_state, thread_state, image_state],
200
  outputs=[user_prompt, chat, thread_state, image_state])
201
 
 
 
202
  voice_input.stream(fn=send_audio,
203
  inputs=[voice_input, client_id],
204
  outputs=voice_transcript,
205
  stream_every=0.5)
206
 
207
+ voice_send_btn.click(fn=feed_transcript,
 
 
 
 
 
 
 
 
 
 
 
 
208
  inputs=[voice_transcript, chat_state, thread_state, image_state, client_id],
209
  outputs=[user_prompt, chat, thread_state, image_state])
210
 
211
+ clear_transcript_btn.click(fn=clear_transcript_only,
212
+ inputs=[client_id],
213
+ outputs=voice_transcript)
214
+
215
+ clear_chat_btn.click(fn=clear_chat_only,
216
+ outputs=[chat, thread_state, image_state])
217
+
218
+ image_state.change(fn=update_image_display,
219
+ inputs=image_state,
220
+ outputs=image_display)
221
 
222
  app.load(fn=create_ws, outputs=[client_id])
223