Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,10 +4,10 @@ import numpy as np
|
|
4 |
import soundfile as sf
|
5 |
from pydub import AudioSegment
|
6 |
from openai import OpenAI
|
7 |
-
from websockets import connect
|
8 |
from dotenv import load_dotenv
|
9 |
|
10 |
-
#
|
11 |
load_dotenv()
|
12 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
13 |
ASSISTANT_ID = os.getenv("ASSISTANT_ID")
|
@@ -45,10 +45,7 @@ class WebSocketClient:
|
|
45 |
buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
|
46 |
audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
|
47 |
out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
|
48 |
-
await self.websocket.send(json.dumps({
|
49 |
-
"type": "input_audio_buffer.append",
|
50 |
-
"audio": base64.b64encode(out.read()).decode()
|
51 |
-
}))
|
52 |
|
53 |
async def receive_messages(self):
|
54 |
async for msg in self.websocket:
|
@@ -77,10 +74,10 @@ def clear_transcript(cid):
|
|
77 |
if cid in connections: connections[cid].transcript = ""
|
78 |
return ""
|
79 |
|
80 |
-
# ---------------- Chat
|
81 |
def handle_chat(user_input, history, thread_id, image_url):
|
82 |
if not OPENAI_API_KEY or not ASSISTANT_ID:
|
83 |
-
return "โ Missing
|
84 |
|
85 |
try:
|
86 |
if thread_id is None:
|
@@ -100,12 +97,8 @@ def handle_chat(user_input, history, thread_id, image_url):
|
|
100 |
if msg.role == "assistant":
|
101 |
content = msg.content[0].text.value
|
102 |
history.append((user_input, content))
|
103 |
-
match = re.search(
|
104 |
-
|
105 |
-
content
|
106 |
-
)
|
107 |
-
if match:
|
108 |
-
image_url = match.group(0)
|
109 |
break
|
110 |
|
111 |
return "", history, thread_id, image_url
|
@@ -113,49 +106,40 @@ def handle_chat(user_input, history, thread_id, image_url):
|
|
113 |
except Exception as e:
|
114 |
return f"โ {e}", history, thread_id, image_url
|
115 |
|
116 |
-
# ---------------- UI ----------------
|
117 |
-
with gr.Blocks(theme=
|
118 |
gr.Markdown("# ๐ Document AI Assistant")
|
119 |
|
120 |
-
#
|
121 |
chat_state = gr.State([])
|
122 |
thread_state = gr.State()
|
123 |
image_state = gr.State()
|
124 |
client_id = gr.State()
|
125 |
-
mic_shown = gr.State(False)
|
126 |
|
127 |
-
with gr.Row(
|
128 |
-
# Left: Document Viewer
|
129 |
with gr.Column(scale=1):
|
130 |
-
|
|
|
131 |
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
134 |
chat = gr.Chatbot(label="๐ฌ Chat", height=450)
|
|
|
|
|
135 |
|
136 |
-
|
137 |
-
user_input = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=6)
|
138 |
-
mic_btn = gr.Button("๐๏ธ", scale=1)
|
139 |
-
send_btn = gr.Button("Send", scale=2)
|
140 |
-
|
141 |
-
# Hidden Voice Section
|
142 |
-
with gr.Row(visible=False) as mic_row:
|
143 |
-
with gr.Column(scale=4):
|
144 |
-
audio = gr.Audio(label="๐ค Speak", streaming=True)
|
145 |
-
with gr.Column(scale=5):
|
146 |
-
transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
|
147 |
-
with gr.Column(scale=2):
|
148 |
-
clear_btn = gr.Button("๐งน Clear")
|
149 |
-
|
150 |
-
# Logic Wiring
|
151 |
-
def toggle_mic(state): return not state, gr.update(visible=not state)
|
152 |
-
mic_btn.click(toggle_mic, inputs=mic_shown, outputs=[mic_shown, mic_row])
|
153 |
send_btn.click(handle_chat,
|
154 |
-
inputs=[
|
155 |
-
outputs=[
|
|
|
156 |
image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
|
157 |
-
|
158 |
-
clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=
|
159 |
-
app.load(
|
160 |
|
161 |
app.launch()
|
|
|
4 |
import soundfile as sf
|
5 |
from pydub import AudioSegment
|
6 |
from openai import OpenAI
|
7 |
+
from websockets import connect, Data, ClientConnection
|
8 |
from dotenv import load_dotenv
|
9 |
|
10 |
+
# ---------------- Environment & Client Setup ----------------
|
11 |
load_dotenv()
|
12 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
13 |
ASSISTANT_ID = os.getenv("ASSISTANT_ID")
|
|
|
45 |
buf = io.BytesIO(); sf.write(buf, int16, sr, format='WAV', subtype='PCM_16')
|
46 |
audio = AudioSegment.from_file(buf, format="wav").set_frame_rate(24000)
|
47 |
out = io.BytesIO(); audio.export(out, format="wav"); out.seek(0)
|
48 |
+
await self.websocket.send(json.dumps({"type": "input_audio_buffer.append", "audio": base64.b64encode(out.read()).decode()}))
|
|
|
|
|
|
|
49 |
|
50 |
async def receive_messages(self):
|
51 |
async for msg in self.websocket:
|
|
|
74 |
if cid in connections: connections[cid].transcript = ""
|
75 |
return ""
|
76 |
|
77 |
+
# ---------------- Chat Functionality ----------------
|
78 |
def handle_chat(user_input, history, thread_id, image_url):
|
79 |
if not OPENAI_API_KEY or not ASSISTANT_ID:
|
80 |
+
return "โ Missing secrets!", history, thread_id, image_url
|
81 |
|
82 |
try:
|
83 |
if thread_id is None:
|
|
|
97 |
if msg.role == "assistant":
|
98 |
content = msg.content[0].text.value
|
99 |
history.append((user_input, content))
|
100 |
+
match = re.search(r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png', content)
|
101 |
+
if match: image_url = match.group(0)
|
|
|
|
|
|
|
|
|
102 |
break
|
103 |
|
104 |
return "", history, thread_id, image_url
|
|
|
106 |
except Exception as e:
|
107 |
return f"โ {e}", history, thread_id, image_url
|
108 |
|
109 |
+
# ---------------- Gradio UI Layout ----------------
|
110 |
+
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
111 |
gr.Markdown("# ๐ Document AI Assistant")
|
112 |
|
113 |
+
# STATES
|
114 |
chat_state = gr.State([])
|
115 |
thread_state = gr.State()
|
116 |
image_state = gr.State()
|
117 |
client_id = gr.State()
|
|
|
118 |
|
119 |
+
with gr.Row():
|
|
|
120 |
with gr.Column(scale=1):
|
121 |
+
# IMAGE VIEWER (left)
|
122 |
+
image_display = gr.Image(label="๐ผ๏ธ Document", type="filepath")
|
123 |
|
124 |
+
# VOICE (under)
|
125 |
+
voice_transcript = gr.Textbox(label="๐๏ธ Transcript", lines=4, interactive=False)
|
126 |
+
voice_input = gr.Audio(label="๐ด Record", streaming=True)
|
127 |
+
clear_btn = gr.Button("๐งน Clear Transcript")
|
128 |
+
|
129 |
+
with gr.Column(scale=2):
|
130 |
+
# CHATBOT (right)
|
131 |
chat = gr.Chatbot(label="๐ฌ Chat", height=450)
|
132 |
+
user_prompt = gr.Textbox(show_label=False, placeholder="Ask your question...")
|
133 |
+
send_btn = gr.Button("Send")
|
134 |
|
135 |
+
# HANDLERS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
send_btn.click(handle_chat,
|
137 |
+
inputs=[user_prompt, chat_state, thread_state, image_state],
|
138 |
+
outputs=[user_prompt, chat, thread_state, image_state])
|
139 |
+
|
140 |
image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
|
141 |
+
voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
|
142 |
+
clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
|
143 |
+
app.load(create_ws, outputs=[client_id])
|
144 |
|
145 |
app.launch()
|