Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ from openai import OpenAI
|
|
7 |
from websockets import connect
|
8 |
from dotenv import load_dotenv
|
9 |
|
10 |
-
# Load
|
11 |
load_dotenv()
|
12 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
13 |
ASSISTANT_ID = os.getenv("ASSISTANT_ID")
|
@@ -70,7 +70,6 @@ class WebSocketClient:
|
|
70 |
if data["type"] == "conversation.item.input_audio_transcription.delta":
|
71 |
self.transcript += data["delta"]
|
72 |
|
73 |
-
# WebSocket connection manager
|
74 |
def create_ws():
|
75 |
cid = str(uuid.uuid4())
|
76 |
client = WebSocketClient(WS_URI, HEADERS, cid)
|
@@ -85,50 +84,44 @@ def send_audio(chunk, cid):
|
|
85 |
connections[cid].enqueue_audio_chunk(sr, arr)
|
86 |
return connections[cid].transcript.strip()
|
87 |
|
88 |
-
def
|
89 |
if cid in connections:
|
90 |
connections[cid].transcript = ""
|
91 |
return ""
|
92 |
|
93 |
-
def format_response(content,
|
94 |
-
|
95 |
-
thumbnails = ""
|
96 |
image_urls = re.findall(r'https://raw\.githubusercontent\.com/[^\s)]+\.png', content)
|
97 |
if image_urls:
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
103 |
if not OPENAI_API_KEY or not ASSISTANT_ID:
|
104 |
-
return "❌ Missing
|
105 |
-
|
106 |
try:
|
107 |
if thread_id is None:
|
108 |
thread = client.beta.threads.create()
|
109 |
thread_id = thread.id
|
110 |
-
|
111 |
-
client.beta.threads.messages.create(thread_id=thread_id, role="user", content=user_input)
|
112 |
run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)
|
113 |
-
|
114 |
while True:
|
115 |
status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
|
116 |
if status.status == "completed":
|
117 |
break
|
118 |
time.sleep(1)
|
119 |
-
|
120 |
msgs = client.beta.threads.messages.list(thread_id=thread_id)
|
121 |
for msg in reversed(msgs.data):
|
122 |
if msg.role == "assistant":
|
123 |
-
|
124 |
-
return format_response(content, user_input), thread_id
|
125 |
-
|
126 |
return "⚠️ No assistant reply", thread_id
|
127 |
-
|
128 |
except Exception as e:
|
129 |
return f"❌ {e}", thread_id
|
130 |
|
131 |
-
# Feed transcript as assistant input
|
132 |
def feed_transcript(transcript, thread_id, cid):
|
133 |
if not transcript.strip():
|
134 |
return gr.update(), thread_id
|
@@ -136,111 +129,98 @@ def feed_transcript(transcript, thread_id, cid):
|
|
136 |
connections[cid].transcript = ""
|
137 |
return handle_chat(transcript, thread_id)
|
138 |
|
139 |
-
#
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
}
|
196 |
-
.big-btn {
|
197 |
-
width: 100%;
|
198 |
-
padding: 12px;
|
199 |
-
font-size: 16px;
|
200 |
-
background: #333;
|
201 |
-
color: white;
|
202 |
-
border: none;
|
203 |
-
border-radius: 8px;
|
204 |
-
}
|
205 |
-
</style>
|
206 |
-
""")
|
207 |
|
208 |
thread_state = gr.State()
|
209 |
client_id = gr.State()
|
210 |
|
211 |
with gr.Column():
|
212 |
-
gr.HTML("<
|
213 |
-
|
214 |
-
output_area = gr.Markdown(elem_id="response", label="💡 Assistant", show_label=False)
|
215 |
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
|
220 |
with gr.Column():
|
221 |
gr.Markdown("🎙️ Real-time Voice Input")
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
|
227 |
# Bindings
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
|
241 |
-
|
242 |
inputs=[client_id],
|
243 |
-
outputs=
|
244 |
|
245 |
app.load(fn=create_ws, outputs=[client_id])
|
246 |
|
|
|
7 |
from websockets import connect
|
8 |
from dotenv import load_dotenv
|
9 |
|
10 |
+
# Load secrets
|
11 |
load_dotenv()
|
12 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
13 |
ASSISTANT_ID = os.getenv("ASSISTANT_ID")
|
|
|
70 |
if data["type"] == "conversation.item.input_audio_transcription.delta":
|
71 |
self.transcript += data["delta"]
|
72 |
|
|
|
73 |
def create_ws():
|
74 |
cid = str(uuid.uuid4())
|
75 |
client = WebSocketClient(WS_URI, HEADERS, cid)
|
|
|
84 |
connections[cid].enqueue_audio_chunk(sr, arr)
|
85 |
return connections[cid].transcript.strip()
|
86 |
|
87 |
+
def clear_transcript(cid):
|
88 |
if cid in connections:
|
89 |
connections[cid].transcript = ""
|
90 |
return ""
|
91 |
|
92 |
+
def format_response(content, prompt):
|
93 |
+
header = f"<div class='response-card'><h3>❓ {prompt}</h3><p><b>🧠 In summary:</b></p><p>{content}</p>"
|
|
|
94 |
image_urls = re.findall(r'https://raw\.githubusercontent\.com/[^\s)]+\.png', content)
|
95 |
if image_urls:
|
96 |
+
header += "<div class='source-grid'><h4>📎 Sources:</h4>"
|
97 |
+
for url in image_urls:
|
98 |
+
header += f"<img src='{url}' class='thumbnail' />"
|
99 |
+
header += "</div>"
|
100 |
+
header += "</div>"
|
101 |
+
return header
|
102 |
+
|
103 |
+
def handle_chat(prompt, thread_id):
|
104 |
if not OPENAI_API_KEY or not ASSISTANT_ID:
|
105 |
+
return "❌ Missing API Key or Assistant ID", thread_id
|
|
|
106 |
try:
|
107 |
if thread_id is None:
|
108 |
thread = client.beta.threads.create()
|
109 |
thread_id = thread.id
|
110 |
+
client.beta.threads.messages.create(thread_id=thread_id, role="user", content=prompt)
|
|
|
111 |
run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)
|
|
|
112 |
while True:
|
113 |
status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
|
114 |
if status.status == "completed":
|
115 |
break
|
116 |
time.sleep(1)
|
|
|
117 |
msgs = client.beta.threads.messages.list(thread_id=thread_id)
|
118 |
for msg in reversed(msgs.data):
|
119 |
if msg.role == "assistant":
|
120 |
+
return format_response(msg.content[0].text.value, prompt), thread_id
|
|
|
|
|
121 |
return "⚠️ No assistant reply", thread_id
|
|
|
122 |
except Exception as e:
|
123 |
return f"❌ {e}", thread_id
|
124 |
|
|
|
125 |
def feed_transcript(transcript, thread_id, cid):
|
126 |
if not transcript.strip():
|
127 |
return gr.update(), thread_id
|
|
|
129 |
connections[cid].transcript = ""
|
130 |
return handle_chat(transcript, thread_id)
|
131 |
|
132 |
+
# === Gradio UI ===
|
133 |
+
with gr.Blocks(css="""
|
134 |
+
body {
|
135 |
+
background-color: #0f0f0f;
|
136 |
+
color: #f1f1f1;
|
137 |
+
font-family: 'Inter', sans-serif;
|
138 |
+
}
|
139 |
+
.response-card {
|
140 |
+
background: #1a1a1a;
|
141 |
+
padding: 20px;
|
142 |
+
border-radius: 14px;
|
143 |
+
margin-top: 16px;
|
144 |
+
box-shadow: 0 2px 6px #000;
|
145 |
+
}
|
146 |
+
.source-grid {
|
147 |
+
display: flex;
|
148 |
+
flex-wrap: wrap;
|
149 |
+
gap: 10px;
|
150 |
+
margin-top: 10px;
|
151 |
+
}
|
152 |
+
.thumbnail {
|
153 |
+
width: 120px;
|
154 |
+
border-radius: 8px;
|
155 |
+
border: 1px solid #333;
|
156 |
+
}
|
157 |
+
.input-wrap {
|
158 |
+
position: fixed;
|
159 |
+
bottom: 20px;
|
160 |
+
left: 0;
|
161 |
+
right: 0;
|
162 |
+
max-width: 700px;
|
163 |
+
margin: auto;
|
164 |
+
display: flex;
|
165 |
+
gap: 10px;
|
166 |
+
padding: 12px;
|
167 |
+
background: #1a1a1a;
|
168 |
+
border-radius: 16px;
|
169 |
+
}
|
170 |
+
#chat-input {
|
171 |
+
flex-grow: 1;
|
172 |
+
padding: 14px;
|
173 |
+
border-radius: 12px;
|
174 |
+
background: #2a2a2a;
|
175 |
+
border: none;
|
176 |
+
color: white;
|
177 |
+
font-size: 16px;
|
178 |
+
}
|
179 |
+
#send-btn {
|
180 |
+
font-size: 18px;
|
181 |
+
background: #3f3fff;
|
182 |
+
color: white;
|
183 |
+
border: none;
|
184 |
+
padding: 12px 16px;
|
185 |
+
border-radius: 10px;
|
186 |
+
}
|
187 |
+
""") as app:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
thread_state = gr.State()
|
190 |
client_id = gr.State()
|
191 |
|
192 |
with gr.Column():
|
193 |
+
gr.HTML("<h1 style='text-align:center; margin-top:40px;'>How can I help you today?</h1>")
|
194 |
+
output_md = gr.HTML()
|
|
|
195 |
|
196 |
+
with gr.Row(elem_id="chat-row", elem_classes="input-wrap"):
|
197 |
+
user_input = gr.Textbox(elem_id="chat-input", show_label=False, placeholder="Ask something...")
|
198 |
+
send_btn = gr.Button("➤", elem_id="send-btn")
|
199 |
|
200 |
with gr.Column():
|
201 |
gr.Markdown("🎙️ Real-time Voice Input")
|
202 |
+
mic_audio = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
|
203 |
+
mic_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
|
204 |
+
mic_send = gr.Button("Send Voice", elem_classes="big-btn")
|
205 |
+
mic_clear = gr.Button("Clear Transcript", elem_classes="big-btn")
|
206 |
|
207 |
# Bindings
|
208 |
+
send_btn.click(fn=handle_chat,
|
209 |
+
inputs=[user_input, thread_state],
|
210 |
+
outputs=[output_md, thread_state])
|
211 |
|
212 |
+
mic_audio.stream(fn=send_audio,
|
213 |
+
inputs=[mic_audio, client_id],
|
214 |
+
outputs=mic_transcript,
|
215 |
+
stream_every=0.5)
|
216 |
|
217 |
+
mic_send.click(fn=feed_transcript,
|
218 |
+
inputs=[mic_transcript, thread_state, client_id],
|
219 |
+
outputs=[output_md, thread_state])
|
220 |
|
221 |
+
mic_clear.click(fn=clear_transcript,
|
222 |
inputs=[client_id],
|
223 |
+
outputs=mic_transcript)
|
224 |
|
225 |
app.load(fn=create_ws, outputs=[client_id])
|
226 |
|