IAMTFRMZA commited on
Commit
e28818b
·
verified ·
1 Parent(s): d051f5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -48
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  import gradio as gr
3
  import os, time, re, json, base64, asyncio, threading, uuid, io
4
  import numpy as np
@@ -8,7 +7,7 @@ from openai import OpenAI
8
  from websockets import connect
9
  from dotenv import load_dotenv
10
 
11
- # Load secrets
12
  load_dotenv()
13
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
@@ -71,7 +70,7 @@ class WebSocketClient:
71
  if data["type"] == "conversation.item.input_audio_transcription.delta":
72
  self.transcript += data["delta"]
73
 
74
- # WebSocket Connection Manager
75
  def create_ws():
76
  cid = str(uuid.uuid4())
77
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -91,9 +90,18 @@ def clear_transcript_only(cid):
91
  connections[cid].transcript = ""
92
  return ""
93
 
 
 
 
 
 
 
 
 
 
94
  def handle_chat(user_input, thread_id):
95
  if not OPENAI_API_KEY or not ASSISTANT_ID:
96
- return "❌ Missing secrets!", thread_id, "", None
97
 
98
  try:
99
  if thread_id is None:
@@ -113,25 +121,20 @@ def handle_chat(user_input, thread_id):
113
  for msg in reversed(msgs.data):
114
  if msg.role == "assistant":
115
  content = msg.content[0].text.value
116
- match = re.search(
117
- r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
118
- content
119
- )
120
- image_url = match.group(0) if match else None
121
- response = f"### ❓ Question\n{user_input}\n\n---\n\n### 💡 Answer\n{content}"
122
- return response, thread_id, image_url
123
 
124
- return "No response from assistant.", thread_id, None
125
 
126
  except Exception as e:
127
- return f"❌ {e}", thread_id, None
128
 
 
129
  def feed_transcript(transcript, thread_id, cid):
130
  if not transcript.strip():
131
- return gr.update(), thread_id, None
132
  if cid in connections:
133
  connections[cid].transcript = ""
134
- return handle_chat(transcript, thread_id,)
135
 
136
  # ============ Gradio UI ============
137
 
@@ -140,21 +143,64 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
140
  <style>
141
  body {
142
  font-family: 'Inter', sans-serif;
143
- background-color: #f9f9fb;
 
144
  }
145
- .big-btn {
146
- font-size: 16px;
147
- padding: 12px 20px;
 
 
 
 
 
 
 
 
 
148
  border-radius: 12px;
 
149
  width: 100%;
150
- background-color: #4f46e5;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  color: white;
152
  border: none;
 
 
 
 
 
 
 
 
 
 
153
  }
154
- .voice-area {
155
- padding-top: 16px;
156
- margin-top: 16px;
157
- border-top: 1px solid #ddd;
 
 
 
 
158
  }
159
  </style>
160
  """)
@@ -162,42 +208,39 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
162
  thread_state = gr.State()
163
  client_id = gr.State()
164
 
165
- with gr.Row(equal_height=True):
166
- with gr.Column(scale=1):
167
- user_input = gr.Textbox(placeholder="Ask your question...", label="Prompt")
168
- submit_btn = gr.Button("🚀 Ask", variant="primary")
169
- result_md = gr.Markdown()
170
- image_output = gr.Image(label="🖼️ Preview", type="filepath", show_download_button=False)
171
 
172
- with gr.Column(elem_classes="voice-area"):
173
- gr.Markdown("🎙️ Real-time Voice Input")
174
- voice_input = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
175
- transcript_box = gr.Textbox(label="Transcript", lines=2, interactive=False)
176
 
177
- voice_submit_btn = gr.Button("Send Voice", elem_classes="big-btn")
178
- clear_transcript_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
 
179
 
180
- with gr.Column(scale=1.4):
181
- gr.Markdown("### ⏱️ Assistant Response")
182
- result_area = gr.Markdown()
 
 
 
183
 
184
  # Bindings
185
- submit_btn.click(fn=handle_chat,
186
- inputs=[user_input, thread_state],
187
- outputs=[result_area, thread_state, image_output])
188
 
189
  voice_input.stream(fn=send_audio,
190
  inputs=[voice_input, client_id],
191
  outputs=transcript_box,
192
  stream_every=0.5)
193
 
194
- voice_submit_btn.click(fn=feed_transcript,
195
- inputs=[transcript_box, thread_state, client_id],
196
- outputs=[result_area, thread_state, image_output])
197
 
198
- clear_transcript_btn.click(fn=clear_transcript_only,
199
- inputs=[client_id],
200
- outputs=transcript_box)
201
 
202
  app.load(fn=create_ws, outputs=[client_id])
203
 
 
 
1
  import gradio as gr
2
  import os, time, re, json, base64, asyncio, threading, uuid, io
3
  import numpy as np
 
7
  from websockets import connect
8
  from dotenv import load_dotenv
9
 
10
+ # Load environment secrets
11
  load_dotenv()
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
 
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
+ # WebSocket connection manager
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
90
  connections[cid].transcript = ""
91
  return ""
92
 
93
+ def format_response(content, user_prompt):
94
+ summary_block = f"""### ❓ {user_prompt}\n\n---\n\n### 🧠 In summary:\n{content}"""
95
+ thumbnails = ""
96
+ image_urls = re.findall(r'https://raw\.githubusercontent\.com/[^\s)]+\.png', content)
97
+ if image_urls:
98
+ thumbnails = "\n\n### 📎 Sources:\n" + "\n".join([f"![]({url})" for url in image_urls])
99
+ return summary_block + thumbnails
100
+
101
+ # Assistant chat handler
102
  def handle_chat(user_input, thread_id):
103
  if not OPENAI_API_KEY or not ASSISTANT_ID:
104
+ return "❌ Missing secrets!", thread_id
105
 
106
  try:
107
  if thread_id is None:
 
121
  for msg in reversed(msgs.data):
122
  if msg.role == "assistant":
123
  content = msg.content[0].text.value
124
+ return format_response(content, user_input), thread_id
 
 
 
 
 
 
125
 
126
+ return "⚠️ No assistant reply", thread_id
127
 
128
  except Exception as e:
129
+ return f"❌ {e}", thread_id
130
 
131
+ # Feed transcript as assistant input
132
  def feed_transcript(transcript, thread_id, cid):
133
  if not transcript.strip():
134
+ return gr.update(), thread_id
135
  if cid in connections:
136
  connections[cid].transcript = ""
137
+ return handle_chat(transcript, thread_id)
138
 
139
  # ============ Gradio UI ============
140
 
 
143
  <style>
144
  body {
145
  font-family: 'Inter', sans-serif;
146
+ background-color: #0f0f0f;
147
+ color: #f0f0f0;
148
  }
149
+ #centered-prompt {
150
+ display: flex;
151
+ flex-direction: column;
152
+ justify-content: center;
153
+ align-items: center;
154
+ height: 65vh;
155
+ text-align: center;
156
+ }
157
+ .prompt-input {
158
+ background: #1f1f1f;
159
+ color: white;
160
+ padding: 16px;
161
  border-radius: 12px;
162
+ border: none;
163
  width: 100%;
164
+ font-size: 16px;
165
+ }
166
+ .input-container {
167
+ position: fixed;
168
+ bottom: 20px;
169
+ left: 0;
170
+ right: 0;
171
+ margin: auto;
172
+ max-width: 700px;
173
+ display: flex;
174
+ gap: 8px;
175
+ background: #1f1f1f;
176
+ padding: 12px;
177
+ border-radius: 16px;
178
+ justify-content: space-between;
179
+ align-items: center;
180
+ }
181
+ .icon-btn {
182
+ background: #292929;
183
  color: white;
184
  border: none;
185
+ border-radius: 50%;
186
+ padding: 10px 14px;
187
+ cursor: pointer;
188
+ font-size: 18px;
189
+ }
190
+ .assistant-response {
191
+ margin-top: 20px;
192
+ padding: 16px;
193
+ background: #1a1a1a;
194
+ border-radius: 16px;
195
  }
196
+ .big-btn {
197
+ width: 100%;
198
+ padding: 12px;
199
+ font-size: 16px;
200
+ background: #333;
201
+ color: white;
202
+ border: none;
203
+ border-radius: 8px;
204
  }
205
  </style>
206
  """)
 
208
  thread_state = gr.State()
209
  client_id = gr.State()
210
 
211
+ with gr.Column():
212
+ gr.HTML("<div id='centered-prompt'><h1>How can I help you today?</h1></div>")
 
 
 
 
213
 
214
+ output_area = gr.Markdown(elem_id="response", label="💡 Assistant", show_label=False)
 
 
 
215
 
216
+ with gr.Row(elem_id="input-row", elem_classes="input-container"):
217
+ prompt_box = gr.Textbox(placeholder="Ask a question...", elem_classes="prompt-input", show_label=False)
218
+ send_button = gr.Button("⬆️", elem_classes="icon-btn")
219
 
220
+ with gr.Column():
221
+ gr.Markdown("🎙️ Real-time Voice Input")
222
+ voice_input = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
223
+ transcript_box = gr.Textbox(label="Transcript", lines=2, interactive=False)
224
+ voice_btn = gr.Button("Send Voice", elem_classes="big-btn")
225
+ clear_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
226
 
227
  # Bindings
228
+ send_button.click(fn=handle_chat,
229
+ inputs=[prompt_box, thread_state],
230
+ outputs=[output_area, thread_state])
231
 
232
  voice_input.stream(fn=send_audio,
233
  inputs=[voice_input, client_id],
234
  outputs=transcript_box,
235
  stream_every=0.5)
236
 
237
+ voice_btn.click(fn=feed_transcript,
238
+ inputs=[transcript_box, thread_state, client_id],
239
+ outputs=[output_area, thread_state])
240
 
241
+ clear_btn.click(fn=clear_transcript_only,
242
+ inputs=[client_id],
243
+ outputs=transcript_box)
244
 
245
  app.load(fn=create_ws, outputs=[client_id])
246