IAMTFRMZA commited on
Commit
8971a48
·
verified ·
1 Parent(s): e28818b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -112
app.py CHANGED
@@ -7,7 +7,7 @@ from openai import OpenAI
7
  from websockets import connect
8
  from dotenv import load_dotenv
9
 
10
- # Load environment secrets
11
  load_dotenv()
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
@@ -70,7 +70,6 @@ class WebSocketClient:
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
73
- # WebSocket connection manager
74
  def create_ws():
75
  cid = str(uuid.uuid4())
76
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -85,50 +84,44 @@ def send_audio(chunk, cid):
85
  connections[cid].enqueue_audio_chunk(sr, arr)
86
  return connections[cid].transcript.strip()
87
 
88
- def clear_transcript_only(cid):
89
  if cid in connections:
90
  connections[cid].transcript = ""
91
  return ""
92
 
93
- def format_response(content, user_prompt):
94
- summary_block = f"""### {user_prompt}\n\n---\n\n### 🧠 In summary:\n{content}"""
95
- thumbnails = ""
96
  image_urls = re.findall(r'https://raw\.githubusercontent\.com/[^\s)]+\.png', content)
97
  if image_urls:
98
- thumbnails = "\n\n### 📎 Sources:\n" + "\n".join([f"![]({url})" for url in image_urls])
99
- return summary_block + thumbnails
100
-
101
- # Assistant chat handler
102
- def handle_chat(user_input, thread_id):
 
 
 
103
  if not OPENAI_API_KEY or not ASSISTANT_ID:
104
- return "❌ Missing secrets!", thread_id
105
-
106
  try:
107
  if thread_id is None:
108
  thread = client.beta.threads.create()
109
  thread_id = thread.id
110
-
111
- client.beta.threads.messages.create(thread_id=thread_id, role="user", content=user_input)
112
  run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)
113
-
114
  while True:
115
  status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
116
  if status.status == "completed":
117
  break
118
  time.sleep(1)
119
-
120
  msgs = client.beta.threads.messages.list(thread_id=thread_id)
121
  for msg in reversed(msgs.data):
122
  if msg.role == "assistant":
123
- content = msg.content[0].text.value
124
- return format_response(content, user_input), thread_id
125
-
126
  return "⚠️ No assistant reply", thread_id
127
-
128
  except Exception as e:
129
  return f"❌ {e}", thread_id
130
 
131
- # Feed transcript as assistant input
132
  def feed_transcript(transcript, thread_id, cid):
133
  if not transcript.strip():
134
  return gr.update(), thread_id
@@ -136,111 +129,98 @@ def feed_transcript(transcript, thread_id, cid):
136
  connections[cid].transcript = ""
137
  return handle_chat(transcript, thread_id)
138
 
139
- # ============ Gradio UI ============
140
-
141
- with gr.Blocks(theme=gr.themes.Soft()) as app:
142
- gr.HTML("""
143
- <style>
144
- body {
145
- font-family: 'Inter', sans-serif;
146
- background-color: #0f0f0f;
147
- color: #f0f0f0;
148
- }
149
- #centered-prompt {
150
- display: flex;
151
- flex-direction: column;
152
- justify-content: center;
153
- align-items: center;
154
- height: 65vh;
155
- text-align: center;
156
- }
157
- .prompt-input {
158
- background: #1f1f1f;
159
- color: white;
160
- padding: 16px;
161
- border-radius: 12px;
162
- border: none;
163
- width: 100%;
164
- font-size: 16px;
165
- }
166
- .input-container {
167
- position: fixed;
168
- bottom: 20px;
169
- left: 0;
170
- right: 0;
171
- margin: auto;
172
- max-width: 700px;
173
- display: flex;
174
- gap: 8px;
175
- background: #1f1f1f;
176
- padding: 12px;
177
- border-radius: 16px;
178
- justify-content: space-between;
179
- align-items: center;
180
- }
181
- .icon-btn {
182
- background: #292929;
183
- color: white;
184
- border: none;
185
- border-radius: 50%;
186
- padding: 10px 14px;
187
- cursor: pointer;
188
- font-size: 18px;
189
- }
190
- .assistant-response {
191
- margin-top: 20px;
192
- padding: 16px;
193
- background: #1a1a1a;
194
- border-radius: 16px;
195
- }
196
- .big-btn {
197
- width: 100%;
198
- padding: 12px;
199
- font-size: 16px;
200
- background: #333;
201
- color: white;
202
- border: none;
203
- border-radius: 8px;
204
- }
205
- </style>
206
- """)
207
 
208
  thread_state = gr.State()
209
  client_id = gr.State()
210
 
211
  with gr.Column():
212
- gr.HTML("<div id='centered-prompt'><h1>How can I help you today?</h1></div>")
213
-
214
- output_area = gr.Markdown(elem_id="response", label="💡 Assistant", show_label=False)
215
 
216
- with gr.Row(elem_id="input-row", elem_classes="input-container"):
217
- prompt_box = gr.Textbox(placeholder="Ask a question...", elem_classes="prompt-input", show_label=False)
218
- send_button = gr.Button("⬆️", elem_classes="icon-btn")
219
 
220
  with gr.Column():
221
  gr.Markdown("🎙️ Real-time Voice Input")
222
- voice_input = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
223
- transcript_box = gr.Textbox(label="Transcript", lines=2, interactive=False)
224
- voice_btn = gr.Button("Send Voice", elem_classes="big-btn")
225
- clear_btn = gr.Button("🧹 Clear Transcript", elem_classes="big-btn")
226
 
227
  # Bindings
228
- send_button.click(fn=handle_chat,
229
- inputs=[prompt_box, thread_state],
230
- outputs=[output_area, thread_state])
231
 
232
- voice_input.stream(fn=send_audio,
233
- inputs=[voice_input, client_id],
234
- outputs=transcript_box,
235
- stream_every=0.5)
236
 
237
- voice_btn.click(fn=feed_transcript,
238
- inputs=[transcript_box, thread_state, client_id],
239
- outputs=[output_area, thread_state])
240
 
241
- clear_btn.click(fn=clear_transcript_only,
242
  inputs=[client_id],
243
- outputs=transcript_box)
244
 
245
  app.load(fn=create_ws, outputs=[client_id])
246
 
 
7
  from websockets import connect
8
  from dotenv import load_dotenv
9
 
10
+ # Load secrets
11
  load_dotenv()
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
 
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
 
73
  def create_ws():
74
  cid = str(uuid.uuid4())
75
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
84
  connections[cid].enqueue_audio_chunk(sr, arr)
85
  return connections[cid].transcript.strip()
86
 
87
+ def clear_transcript(cid):
88
  if cid in connections:
89
  connections[cid].transcript = ""
90
  return ""
91
 
92
+ def format_response(content, prompt):
93
+ header = f"<div class='response-card'><h3>❓ {prompt}</h3><p><b>🧠 In summary:</b></p><p>{content}</p>"
 
94
  image_urls = re.findall(r'https://raw\.githubusercontent\.com/[^\s)]+\.png', content)
95
  if image_urls:
96
+ header += "<div class='source-grid'><h4>📎 Sources:</h4>"
97
+ for url in image_urls:
98
+ header += f"<img src='{url}' class='thumbnail' />"
99
+ header += "</div>"
100
+ header += "</div>"
101
+ return header
102
+
103
+ def handle_chat(prompt, thread_id):
104
  if not OPENAI_API_KEY or not ASSISTANT_ID:
105
+ return "❌ Missing API Key or Assistant ID", thread_id
 
106
  try:
107
  if thread_id is None:
108
  thread = client.beta.threads.create()
109
  thread_id = thread.id
110
+ client.beta.threads.messages.create(thread_id=thread_id, role="user", content=prompt)
 
111
  run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)
 
112
  while True:
113
  status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
114
  if status.status == "completed":
115
  break
116
  time.sleep(1)
 
117
  msgs = client.beta.threads.messages.list(thread_id=thread_id)
118
  for msg in reversed(msgs.data):
119
  if msg.role == "assistant":
120
+ return format_response(msg.content[0].text.value, prompt), thread_id
 
 
121
  return "⚠️ No assistant reply", thread_id
 
122
  except Exception as e:
123
  return f"❌ {e}", thread_id
124
 
 
125
  def feed_transcript(transcript, thread_id, cid):
126
  if not transcript.strip():
127
  return gr.update(), thread_id
 
129
  connections[cid].transcript = ""
130
  return handle_chat(transcript, thread_id)
131
 
132
+ # === Gradio UI ===
133
+ with gr.Blocks(css="""
134
+ body {
135
+ background-color: #0f0f0f;
136
+ color: #f1f1f1;
137
+ font-family: 'Inter', sans-serif;
138
+ }
139
+ .response-card {
140
+ background: #1a1a1a;
141
+ padding: 20px;
142
+ border-radius: 14px;
143
+ margin-top: 16px;
144
+ box-shadow: 0 2px 6px #000;
145
+ }
146
+ .source-grid {
147
+ display: flex;
148
+ flex-wrap: wrap;
149
+ gap: 10px;
150
+ margin-top: 10px;
151
+ }
152
+ .thumbnail {
153
+ width: 120px;
154
+ border-radius: 8px;
155
+ border: 1px solid #333;
156
+ }
157
+ .input-wrap {
158
+ position: fixed;
159
+ bottom: 20px;
160
+ left: 0;
161
+ right: 0;
162
+ max-width: 700px;
163
+ margin: auto;
164
+ display: flex;
165
+ gap: 10px;
166
+ padding: 12px;
167
+ background: #1a1a1a;
168
+ border-radius: 16px;
169
+ }
170
+ #chat-input {
171
+ flex-grow: 1;
172
+ padding: 14px;
173
+ border-radius: 12px;
174
+ background: #2a2a2a;
175
+ border: none;
176
+ color: white;
177
+ font-size: 16px;
178
+ }
179
+ #send-btn {
180
+ font-size: 18px;
181
+ background: #3f3fff;
182
+ color: white;
183
+ border: none;
184
+ padding: 12px 16px;
185
+ border-radius: 10px;
186
+ }
187
+ """) as app:
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  thread_state = gr.State()
190
  client_id = gr.State()
191
 
192
  with gr.Column():
193
+ gr.HTML("<h1 style='text-align:center; margin-top:40px;'>How can I help you today?</h1>")
194
+ output_md = gr.HTML()
 
195
 
196
+ with gr.Row(elem_id="chat-row", elem_classes="input-wrap"):
197
+ user_input = gr.Textbox(elem_id="chat-input", show_label=False, placeholder="Ask something...")
198
+ send_btn = gr.Button("", elem_id="send-btn")
199
 
200
  with gr.Column():
201
  gr.Markdown("🎙️ Real-time Voice Input")
202
+ mic_audio = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
203
+ mic_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
204
+ mic_send = gr.Button("Send Voice", elem_classes="big-btn")
205
+ mic_clear = gr.Button("Clear Transcript", elem_classes="big-btn")
206
 
207
  # Bindings
208
+ send_btn.click(fn=handle_chat,
209
+ inputs=[user_input, thread_state],
210
+ outputs=[output_md, thread_state])
211
 
212
+ mic_audio.stream(fn=send_audio,
213
+ inputs=[mic_audio, client_id],
214
+ outputs=mic_transcript,
215
+ stream_every=0.5)
216
 
217
+ mic_send.click(fn=feed_transcript,
218
+ inputs=[mic_transcript, thread_state, client_id],
219
+ outputs=[output_md, thread_state])
220
 
221
+ mic_clear.click(fn=clear_transcript,
222
  inputs=[client_id],
223
+ outputs=mic_transcript)
224
 
225
  app.load(fn=create_ws, outputs=[client_id])
226