IAMTFRMZA commited on
Commit
d69e46f
ยท
verified ยท
1 Parent(s): dbf9e7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -25
app.py CHANGED
@@ -7,7 +7,7 @@ from openai import OpenAI
7
  from websockets import connect, Data, ClientConnection
8
  from dotenv import load_dotenv
9
 
10
- # ---------------- Environment & Client Setup ----------------
11
  load_dotenv()
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
@@ -17,7 +17,7 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
17
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
18
  connections = {}
19
 
20
- # ---------------- WebSocket Client for Voice ----------------
21
  class WebSocketClient:
22
  def __init__(self, uri, headers, client_id):
23
  self.uri, self.headers, self.client_id = uri, headers, client_id
@@ -45,7 +45,10 @@ class WebSocketClient:
45
  buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
46
  audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
47
  out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
48
- await self.websocket.send(json.dumps({"type": "input_audio_buffer.append", "audio": base64.b64encode(out.read()).decode()}))
 
 
 
49
 
50
  async def receive_messages(self):
51
  async for msg in self.websocket:
@@ -74,7 +77,7 @@ def clear_transcript(cid):
74
  if cid in connections: connections[cid].transcript = ""
75
  return ""
76
 
77
- # ---------------- Chat Functionality ----------------
78
  def handle_chat(user_input, history, thread_id, image_url):
79
  if not OPENAI_API_KEY or not ASSISTANT_ID:
80
  return "โŒ Missing secrets!", history, thread_id, image_url
@@ -97,7 +100,10 @@ def handle_chat(user_input, history, thread_id, image_url):
97
  if msg.role == "assistant":
98
  content = msg.content[0].text.value
99
  history.append((user_input, content))
100
- match = re.search(r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png', content)
 
 
 
101
  if match: image_url = match.group(0)
102
  break
103
 
@@ -106,7 +112,7 @@ def handle_chat(user_input, history, thread_id, image_url):
106
  except Exception as e:
107
  return f"โŒ {e}", history, thread_id, image_url
108
 
109
- # ---------------- Gradio UI Layout ----------------
110
  with gr.Blocks(theme=gr.themes.Soft()) as app:
111
  gr.Markdown("# ๐Ÿ“„ Document AI Assistant")
112
 
@@ -115,31 +121,37 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
115
  thread_state = gr.State()
116
  image_state = gr.State()
117
  client_id = gr.State()
 
118
 
119
- with gr.Row():
120
  with gr.Column(scale=1):
121
- # IMAGE VIEWER (left)
122
- image_display = gr.Image(label="๐Ÿ–ผ๏ธ Document", type="filepath")
123
-
124
- # VOICE (under)
125
- voice_transcript = gr.Textbox(label="๐ŸŽ™๏ธ Transcript", lines=4, interactive=False)
126
- voice_input = gr.Audio(label="๐Ÿ”ด Record", streaming=True)
127
- clear_btn = gr.Button("๐Ÿงน Clear Transcript")
128
-
129
- with gr.Column(scale=2):
130
- # CHATBOT (right)
131
- chat = gr.Chatbot(label="๐Ÿ’ฌ Chat", height=450)
132
- user_prompt = gr.Textbox(show_label=False, placeholder="Ask your question...")
133
- send_btn = gr.Button("Send")
134
-
135
- # HANDLERS
136
- send_btn.click(handle_chat,
 
 
 
 
 
 
137
  inputs=[user_prompt, chat_state, thread_state, image_state],
138
  outputs=[user_prompt, chat, thread_state, image_state])
139
-
140
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
141
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
142
  clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
143
- app.load(create_ws, outputs=[client_id])
144
 
145
  app.launch()
 
7
  from websockets import connect, Data, ClientConnection
8
  from dotenv import load_dotenv
9
 
10
+ # ============ Load Secrets ============
11
  load_dotenv()
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
 
17
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
18
  connections = {}
19
 
20
+ # ============ WebSocket Client ============
21
  class WebSocketClient:
22
  def __init__(self, uri, headers, client_id):
23
  self.uri, self.headers, self.client_id = uri, headers, client_id
 
45
  buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
46
  audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
47
  out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
48
+ await self.websocket.send(json.dumps({
49
+ "type": "input_audio_buffer.append",
50
+ "audio": base64.b64encode(out.read()).decode()
51
+ }))
52
 
53
  async def receive_messages(self):
54
  async for msg in self.websocket:
 
77
  if cid in connections: connections[cid].transcript = ""
78
  return ""
79
 
80
+ # ============ Chat Assistant ============
81
  def handle_chat(user_input, history, thread_id, image_url):
82
  if not OPENAI_API_KEY or not ASSISTANT_ID:
83
  return "โŒ Missing secrets!", history, thread_id, image_url
 
100
  if msg.role == "assistant":
101
  content = msg.content[0].text.value
102
  history.append((user_input, content))
103
+ match = re.search(
104
+ r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
105
+ content
106
+ )
107
  if match: image_url = match.group(0)
108
  break
109
 
 
112
  except Exception as e:
113
  return f"โŒ {e}", history, thread_id, image_url
114
 
115
+ # ============ Gradio UI ============
116
  with gr.Blocks(theme=gr.themes.Soft()) as app:
117
  gr.Markdown("# ๐Ÿ“„ Document AI Assistant")
118
 
 
121
  thread_state = gr.State()
122
  image_state = gr.State()
123
  client_id = gr.State()
124
+ voice_enabled = gr.State(False)
125
 
126
+ with gr.Row(equal_height=True):
127
  with gr.Column(scale=1):
128
+ image_display = gr.Image(label="๐Ÿ–ผ๏ธ Document", type="filepath", show_download_button=False)
129
+
130
+ with gr.Column(scale=1.4):
131
+ chat = gr.Chatbot(label="๐Ÿ’ฌ Chat", height=460)
132
+
133
+ with gr.Row():
134
+ user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=6)
135
+ mic_toggle_btn = gr.Button("๐ŸŽ™๏ธ", scale=1)
136
+ send_btn = gr.Button("Send", variant="primary", scale=2)
137
+
138
+ with gr.Accordion("๐ŸŽค Voice Transcription", open=False) as voice_section:
139
+ with gr.Row():
140
+ voice_input = gr.Audio(label="Mic", streaming=True)
141
+ voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
142
+ clear_btn = gr.Button("๐Ÿงน Clear Transcript")
143
+
144
+ # FUNCTIONAL CONNECTIONS
145
+ def toggle_voice(curr):
146
+ return not curr, gr.update(visible=not curr)
147
+
148
+ mic_toggle_btn.click(fn=toggle_voice, inputs=voice_enabled, outputs=[voice_enabled, voice_section])
149
+ send_btn.click(fn=handle_chat,
150
  inputs=[user_prompt, chat_state, thread_state, image_state],
151
  outputs=[user_prompt, chat, thread_state, image_state])
 
152
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
153
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
154
  clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
155
+ app.load(fn=create_ws, outputs=[client_id])
156
 
157
  app.launch()