IAMTFRMZA commited on
Commit
9850ad3
·
verified ·
1 Parent(s): 7f2459b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -21
app.py CHANGED
@@ -17,7 +17,7 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
17
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
18
  connections = {}
19
 
20
- # ============ WebSocket Client for Voice ============
21
  class WebSocketClient:
22
  def __init__(self, uri, headers, client_id):
23
  self.uri, self.headers, self.client_id = uri, headers, client_id
@@ -45,7 +45,10 @@ class WebSocketClient:
45
  buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
46
  audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
47
  out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
48
- await self.websocket.send(json.dumps({"type": "input_audio_buffer.append", "audio": base64.b64encode(out.read()).decode()}))
 
 
 
49
 
50
  async def receive_messages(self):
51
  async for msg in self.websocket:
@@ -74,7 +77,7 @@ def clear_transcript(cid):
74
  if cid in connections: connections[cid].transcript = ""
75
  return ""
76
 
77
- # ============ Chat Assistant Logic ============
78
  def handle_chat(user_input, history, thread_id, image_url):
79
  if not OPENAI_API_KEY or not ASSISTANT_ID:
80
  return "❌ Missing secrets!", history, thread_id, image_url
@@ -97,7 +100,10 @@ def handle_chat(user_input, history, thread_id, image_url):
97
  if msg.role == "assistant":
98
  content = msg.content[0].text.value
99
  history.append((user_input, content))
100
- match = re.search(r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png', content)
 
 
 
101
  if match: image_url = match.group(0)
102
  break
103
 
@@ -115,30 +121,37 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
115
  thread_state = gr.State()
116
  image_state = gr.State()
117
  client_id = gr.State()
 
118
 
119
  with gr.Row(equal_height=True):
120
- # LEFT COLUMN — Image + Voice
121
- with gr.Column(scale=1, min_width=400):
122
- image_display = gr.Image(label="🖼️ Document Image", type="filepath", show_download_button=False, show_label=False)
 
 
 
123
  with gr.Row():
124
- voice_input = gr.Audio(label="🎙️ Mic", streaming=True)
125
- voice_transcript = gr.Textbox(label="📝 Transcript", lines=3, interactive=False)
126
- clear_btn = gr.Button("🧹 Clear Transcript")
127
-
128
- # RIGHT COLUMN Chatbot
129
- with gr.Column(scale=1.4, min_width=500):
130
- chat = gr.Chatbot(label="💬 Chat", height=480)
131
- user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False)
132
- send_btn = gr.Button("Send", variant="primary")
133
-
134
- # HOOKS
135
- send_btn.click(handle_chat,
 
 
 
 
136
  inputs=[user_prompt, chat_state, thread_state, image_state],
137
  outputs=[user_prompt, chat, thread_state, image_state])
138
-
139
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
140
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
141
  clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
142
- app.load(create_ws, outputs=[client_id])
143
 
144
  app.launch()
 
17
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
18
  connections = {}
19
 
20
+ # ============ WebSocket Client ============
21
  class WebSocketClient:
22
  def __init__(self, uri, headers, client_id):
23
  self.uri, self.headers, self.client_id = uri, headers, client_id
 
45
  buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
46
  audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
47
  out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
48
+ await self.websocket.send(json.dumps({
49
+ "type": "input_audio_buffer.append",
50
+ "audio": base64.b64encode(out.read()).decode()
51
+ }))
52
 
53
  async def receive_messages(self):
54
  async for msg in self.websocket:
 
77
  if cid in connections: connections[cid].transcript = ""
78
  return ""
79
 
80
+ # ============ Chat Assistant ============
81
  def handle_chat(user_input, history, thread_id, image_url):
82
  if not OPENAI_API_KEY or not ASSISTANT_ID:
83
  return "❌ Missing secrets!", history, thread_id, image_url
 
100
  if msg.role == "assistant":
101
  content = msg.content[0].text.value
102
  history.append((user_input, content))
103
+ match = re.search(
104
+ r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
105
+ content
106
+ )
107
  if match: image_url = match.group(0)
108
  break
109
 
 
121
  thread_state = gr.State()
122
  image_state = gr.State()
123
  client_id = gr.State()
124
+ voice_enabled = gr.State(False)
125
 
126
  with gr.Row(equal_height=True):
127
+ with gr.Column(scale=1):
128
+ image_display = gr.Image(label="🖼️ Document", type="filepath", show_download_button=False)
129
+
130
+ with gr.Column(scale=1.4):
131
+ chat = gr.Chatbot(label="💬 Chat", height=460)
132
+
133
  with gr.Row():
134
+ user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=6)
135
+ mic_toggle_btn = gr.Button("🎙️", scale=1)
136
+ send_btn = gr.Button("Send", variant="primary", scale=2)
137
+
138
+ with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
139
+ with gr.Row():
140
+ voice_input = gr.Audio(label="Mic", streaming=True)
141
+ voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
142
+ clear_btn = gr.Button("🧹 Clear Transcript")
143
+
144
+ # FUNCTIONAL CONNECTIONS
145
+ def toggle_voice(curr):
146
+ return not curr, gr.update(visible=not curr)
147
+
148
+ mic_toggle_btn.click(fn=toggle_voice, inputs=voice_enabled, outputs=[voice_enabled, voice_section])
149
+ send_btn.click(fn=handle_chat,
150
  inputs=[user_prompt, chat_state, thread_state, image_state],
151
  outputs=[user_prompt, chat, thread_state, image_state])
 
152
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
153
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
154
  clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
155
+ app.load(fn=create_ws, outputs=[client_id])
156
 
157
  app.launch()