IAMTFRMZA commited on
Commit
dbf9e7a
ยท
verified ยท
1 Parent(s): fa361c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -45
app.py CHANGED
@@ -4,10 +4,10 @@ import numpy as np
4
  import soundfile as sf
5
  from pydub import AudioSegment
6
  from openai import OpenAI
7
- from websockets import connect
8
  from dotenv import load_dotenv
9
 
10
- # Load environment variables
11
  load_dotenv()
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
@@ -45,10 +45,7 @@ class WebSocketClient:
45
  buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
46
  audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
47
  out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
48
- await self.websocket.send(json.dumps({
49
- "type": "input_audio_buffer.append",
50
- "audio": base64.b64encode(out.read()).decode()
51
- }))
52
 
53
  async def receive_messages(self):
54
  async for msg in self.websocket:
@@ -77,10 +74,10 @@ def clear_transcript(cid):
77
  if cid in connections: connections[cid].transcript = ""
78
  return ""
79
 
80
- # ---------------- Chat Assistant Logic ----------------
81
  def handle_chat(user_input, history, thread_id, image_url):
82
  if not OPENAI_API_KEY or not ASSISTANT_ID:
83
- return "โŒ Missing API key or Assistant ID.", history, thread_id, image_url
84
 
85
  try:
86
  if thread_id is None:
@@ -100,12 +97,8 @@ def handle_chat(user_input, history, thread_id, image_url):
100
  if msg.role == "assistant":
101
  content = msg.content[0].text.value
102
  history.append((user_input, content))
103
- match = re.search(
104
- r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
105
- content
106
- )
107
- if match:
108
- image_url = match.group(0)
109
  break
110
 
111
  return "", history, thread_id, image_url
@@ -113,49 +106,40 @@ def handle_chat(user_input, history, thread_id, image_url):
113
  except Exception as e:
114
  return f"โŒ {e}", history, thread_id, image_url
115
 
116
- # ---------------- UI ----------------
117
- with gr.Blocks(theme="lone17/kotaemon") as app:
118
  gr.Markdown("# ๐Ÿ“„ Document AI Assistant")
119
 
120
- # States
121
  chat_state = gr.State([])
122
  thread_state = gr.State()
123
  image_state = gr.State()
124
  client_id = gr.State()
125
- mic_shown = gr.State(False)
126
 
127
- with gr.Row(equal_height=True):
128
- # Left: Document Viewer
129
  with gr.Column(scale=1):
130
- image_display = gr.Image(label="๐Ÿ–ผ๏ธ Document Preview", type="filepath", show_download_button=False)
 
131
 
132
- # Right: Chat + Mic
133
- with gr.Column(scale=1.4):
 
 
 
 
 
134
  chat = gr.Chatbot(label="๐Ÿ’ฌ Chat", height=450)
 
 
135
 
136
- with gr.Row():
137
- user_input = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=6)
138
- mic_btn = gr.Button("๐ŸŽ™๏ธ", scale=1)
139
- send_btn = gr.Button("Send", scale=2)
140
-
141
- # Hidden Voice Section
142
- with gr.Row(visible=False) as mic_row:
143
- with gr.Column(scale=4):
144
- audio = gr.Audio(label="๐ŸŽค Speak", streaming=True)
145
- with gr.Column(scale=5):
146
- transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
147
- with gr.Column(scale=2):
148
- clear_btn = gr.Button("๐Ÿงน Clear")
149
-
150
- # Logic Wiring
151
- def toggle_mic(state): return not state, gr.update(visible=not state)
152
- mic_btn.click(toggle_mic, inputs=mic_shown, outputs=[mic_shown, mic_row])
153
  send_btn.click(handle_chat,
154
- inputs=[user_input, chat_state, thread_state, image_state],
155
- outputs=[user_input, chat, thread_state, image_state])
 
156
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
157
- audio.stream(fn=send_audio, inputs=[audio, client_id], outputs=transcript, stream_every=0.5)
158
- clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=transcript)
159
- app.load(fn=create_ws, outputs=[client_id])
160
 
161
  app.launch()
 
4
  import soundfile as sf
5
  from pydub import AudioSegment
6
  from openai import OpenAI
7
+ from websockets import connect, Data, ClientConnection
8
  from dotenv import load_dotenv
9
 
10
+ # ---------------- Environment & Client Setup ----------------
11
  load_dotenv()
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
 
45
  buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
46
  audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
47
  out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
48
+ await self.websocket.send(json.dumps({"type": "input_audio_buffer.append", "audio": base64.b64encode(out.read()).decode()}))
 
 
 
49
 
50
  async def receive_messages(self):
51
  async for msg in self.websocket:
 
74
  if cid in connections: connections[cid].transcript = ""
75
  return ""
76
 
77
+ # ---------------- Chat Functionality ----------------
78
  def handle_chat(user_input, history, thread_id, image_url):
79
  if not OPENAI_API_KEY or not ASSISTANT_ID:
80
+ return "โŒ Missing secrets!", history, thread_id, image_url
81
 
82
  try:
83
  if thread_id is None:
 
97
  if msg.role == "assistant":
98
  content = msg.content[0].text.value
99
  history.append((user_input, content))
100
+ match = re.search(r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png', content)
101
+ if match: image_url = match.group(0)
 
 
 
 
102
  break
103
 
104
  return "", history, thread_id, image_url
 
106
  except Exception as e:
107
  return f"โŒ {e}", history, thread_id, image_url
108
 
109
+ # ---------------- Gradio UI Layout ----------------
110
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
111
  gr.Markdown("# ๐Ÿ“„ Document AI Assistant")
112
 
113
+ # STATES
114
  chat_state = gr.State([])
115
  thread_state = gr.State()
116
  image_state = gr.State()
117
  client_id = gr.State()
 
118
 
119
+ with gr.Row():
 
120
  with gr.Column(scale=1):
121
+ # IMAGE VIEWER (left)
122
+ image_display = gr.Image(label="๐Ÿ–ผ๏ธ Document", type="filepath")
123
 
124
+ # VOICE (under)
125
+ voice_transcript = gr.Textbox(label="๐ŸŽ™๏ธ Transcript", lines=4, interactive=False)
126
+ voice_input = gr.Audio(label="๐Ÿ”ด Record", streaming=True)
127
+ clear_btn = gr.Button("๐Ÿงน Clear Transcript")
128
+
129
+ with gr.Column(scale=2):
130
+ # CHATBOT (right)
131
  chat = gr.Chatbot(label="๐Ÿ’ฌ Chat", height=450)
132
+ user_prompt = gr.Textbox(show_label=False, placeholder="Ask your question...")
133
+ send_btn = gr.Button("Send")
134
 
135
+ # HANDLERS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  send_btn.click(handle_chat,
137
+ inputs=[user_prompt, chat_state, thread_state, image_state],
138
+ outputs=[user_prompt, chat, thread_state, image_state])
139
+
140
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
141
+ voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
142
+ clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
143
+ app.load(create_ws, outputs=[client_id])
144
 
145
  app.launch()