Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import os, time, re, json, base64, asyncio, threading, uuid, io
|
3 |
import numpy as np
|
@@ -7,7 +8,7 @@ from openai import OpenAI
|
|
7 |
from websockets import connect
|
8 |
from dotenv import load_dotenv
|
9 |
|
10 |
-
#
|
11 |
load_dotenv()
|
12 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
13 |
ASSISTANT_ID = os.getenv("ASSISTANT_ID")
|
@@ -17,10 +18,12 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
|
|
17 |
WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
|
18 |
connections = {}
|
19 |
|
20 |
-
#
|
21 |
class WebSocketClient:
|
22 |
def __init__(self, uri, headers, client_id):
|
23 |
-
self.uri
|
|
|
|
|
24 |
self.websocket = None
|
25 |
self.queue = asyncio.Queue(maxsize=10)
|
26 |
self.transcript = ""
|
@@ -68,7 +71,7 @@ class WebSocketClient:
|
|
68 |
if data["type"] == "conversation.item.input_audio_transcription.delta":
|
69 |
self.transcript += data["delta"]
|
70 |
|
71 |
-
#
|
72 |
def create_ws():
|
73 |
cid = str(uuid.uuid4())
|
74 |
client = WebSocketClient(WS_URI, HEADERS, cid)
|
@@ -103,8 +106,7 @@ def handle_chat(user_input, history, thread_id, image_url):
|
|
103 |
|
104 |
while True:
|
105 |
status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
|
106 |
-
if status.status == "completed":
|
107 |
-
break
|
108 |
time.sleep(1)
|
109 |
|
110 |
msgs = client.beta.threads.messages.list(thread_id=thread_id)
|
@@ -116,8 +118,7 @@ def handle_chat(user_input, history, thread_id, image_url):
|
|
116 |
r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
|
117 |
content
|
118 |
)
|
119 |
-
if match:
|
120 |
-
image_url = match.group(0)
|
121 |
break
|
122 |
|
123 |
return "", history, thread_id, image_url
|
@@ -125,26 +126,15 @@ def handle_chat(user_input, history, thread_id, image_url):
|
|
125 |
except Exception as e:
|
126 |
return f"❌ {e}", history, thread_id, image_url
|
127 |
|
128 |
-
# ============ Auto-Send Voice Toggle ============
|
129 |
-
def maybe_send_transcript(transcript, history, thread_id, image_url, voice_only_enabled, client_id):
|
130 |
-
if voice_only_enabled and transcript.strip():
|
131 |
-
# Clear transcript after sending
|
132 |
-
if client_id in connections:
|
133 |
-
connections[client_id].transcript = ""
|
134 |
-
return handle_chat(transcript, history, thread_id, image_url)
|
135 |
-
return transcript, history, thread_id, image_url
|
136 |
-
|
137 |
# ============ Gradio UI ============
|
138 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
139 |
gr.Markdown("# 📄 Document AI Assistant")
|
140 |
|
141 |
-
# STATES
|
142 |
chat_state = gr.State([])
|
143 |
thread_state = gr.State()
|
144 |
image_state = gr.State()
|
145 |
client_id = gr.State()
|
146 |
voice_enabled = gr.State(False)
|
147 |
-
voice_only_state = gr.State(True)
|
148 |
|
149 |
with gr.Row(equal_height=True):
|
150 |
with gr.Column(scale=1):
|
@@ -163,9 +153,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
163 |
voice_input = gr.Audio(label="Mic", streaming=True)
|
164 |
voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
|
165 |
clear_btn = gr.Button("🧹 Clear Transcript")
|
166 |
-
voice_only_toggle = gr.Checkbox(label="Voice-Only Mode 🎤➡️💬", value=True)
|
167 |
|
168 |
-
#
|
169 |
def toggle_voice(curr):
|
170 |
return not curr, gr.update(visible=not curr)
|
171 |
|
@@ -174,21 +163,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
174 |
inputs=[user_prompt, chat_state, thread_state, image_state],
|
175 |
outputs=[user_prompt, chat, thread_state, image_state])
|
176 |
image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
|
177 |
-
|
178 |
-
# Real-time audio streaming
|
179 |
voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
|
180 |
clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
|
181 |
-
|
182 |
-
# Auto-send voice transcript if Voice-Only Mode is enabled
|
183 |
-
voice_input.change(
|
184 |
-
fn=maybe_send_transcript,
|
185 |
-
inputs=[voice_transcript, chat_state, thread_state, image_state, voice_only_state, client_id],
|
186 |
-
outputs=[user_prompt, chat, thread_state, image_state]
|
187 |
-
)
|
188 |
-
|
189 |
-
voice_only_toggle.change(fn=lambda x: x, inputs=voice_only_toggle, outputs=voice_only_state)
|
190 |
-
|
191 |
-
# Initialize WebSocket connection
|
192 |
app.load(fn=create_ws, outputs=[client_id])
|
193 |
|
194 |
app.launch()
|
|
|
1 |
+
# top of the file
|
2 |
import gradio as gr
|
3 |
import os, time, re, json, base64, asyncio, threading, uuid, io
|
4 |
import numpy as np
|
|
|
8 |
from websockets import connect
|
9 |
from dotenv import load_dotenv
|
10 |
|
11 |
+
# Load secrets
|
12 |
load_dotenv()
|
13 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
14 |
ASSISTANT_ID = os.getenv("ASSISTANT_ID")
|
|
|
18 |
WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
|
19 |
connections = {}
|
20 |
|
21 |
+
# WebSocket Client
|
22 |
class WebSocketClient:
|
23 |
def __init__(self, uri, headers, client_id):
|
24 |
+
self.uri = uri
|
25 |
+
self.headers = headers
|
26 |
+
self.client_id = client_id
|
27 |
self.websocket = None
|
28 |
self.queue = asyncio.Queue(maxsize=10)
|
29 |
self.transcript = ""
|
|
|
71 |
if data["type"] == "conversation.item.input_audio_transcription.delta":
|
72 |
self.transcript += data["delta"]
|
73 |
|
74 |
+
# Real-time transcription connection manager
|
75 |
def create_ws():
|
76 |
cid = str(uuid.uuid4())
|
77 |
client = WebSocketClient(WS_URI, HEADERS, cid)
|
|
|
106 |
|
107 |
while True:
|
108 |
status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
|
109 |
+
if status.status == "completed": break
|
|
|
110 |
time.sleep(1)
|
111 |
|
112 |
msgs = client.beta.threads.messages.list(thread_id=thread_id)
|
|
|
118 |
r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
|
119 |
content
|
120 |
)
|
121 |
+
if match: image_url = match.group(0)
|
|
|
122 |
break
|
123 |
|
124 |
return "", history, thread_id, image_url
|
|
|
126 |
except Exception as e:
|
127 |
return f"❌ {e}", history, thread_id, image_url
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
# ============ Gradio UI ============
|
130 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
131 |
gr.Markdown("# 📄 Document AI Assistant")
|
132 |
|
|
|
133 |
chat_state = gr.State([])
|
134 |
thread_state = gr.State()
|
135 |
image_state = gr.State()
|
136 |
client_id = gr.State()
|
137 |
voice_enabled = gr.State(False)
|
|
|
138 |
|
139 |
with gr.Row(equal_height=True):
|
140 |
with gr.Column(scale=1):
|
|
|
153 |
voice_input = gr.Audio(label="Mic", streaming=True)
|
154 |
voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
|
155 |
clear_btn = gr.Button("🧹 Clear Transcript")
|
|
|
156 |
|
157 |
+
# Functional bindings
|
158 |
def toggle_voice(curr):
|
159 |
return not curr, gr.update(visible=not curr)
|
160 |
|
|
|
163 |
inputs=[user_prompt, chat_state, thread_state, image_state],
|
164 |
outputs=[user_prompt, chat, thread_state, image_state])
|
165 |
image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
|
|
|
|
|
166 |
voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
|
167 |
clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
app.load(fn=create_ws, outputs=[client_id])
|
169 |
|
170 |
app.launch()
|