IAMTFRMZA commited on
Commit
bafde5e
·
verified ·
1 Parent(s): 3bfe4cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -34
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import os, time, re, json, base64, asyncio, threading, uuid, io
3
  import numpy as np
@@ -7,7 +8,7 @@ from openai import OpenAI
7
  from websockets import connect
8
  from dotenv import load_dotenv
9
 
10
- # ============ Load Secrets ============
11
  load_dotenv()
12
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
@@ -17,10 +18,12 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
17
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
18
  connections = {}
19
 
20
- # ============ WebSocket Client ============
21
  class WebSocketClient:
22
  def __init__(self, uri, headers, client_id):
23
- self.uri, self.headers, self.client_id = uri, headers, client_id
 
 
24
  self.websocket = None
25
  self.queue = asyncio.Queue(maxsize=10)
26
  self.transcript = ""
@@ -68,7 +71,7 @@ class WebSocketClient:
68
  if data["type"] == "conversation.item.input_audio_transcription.delta":
69
  self.transcript += data["delta"]
70
 
71
- # ============ Connection Manager ============
72
  def create_ws():
73
  cid = str(uuid.uuid4())
74
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -103,8 +106,7 @@ def handle_chat(user_input, history, thread_id, image_url):
103
 
104
  while True:
105
  status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
106
- if status.status == "completed":
107
- break
108
  time.sleep(1)
109
 
110
  msgs = client.beta.threads.messages.list(thread_id=thread_id)
@@ -116,8 +118,7 @@ def handle_chat(user_input, history, thread_id, image_url):
116
  r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
117
  content
118
  )
119
- if match:
120
- image_url = match.group(0)
121
  break
122
 
123
  return "", history, thread_id, image_url
@@ -125,26 +126,15 @@ def handle_chat(user_input, history, thread_id, image_url):
125
  except Exception as e:
126
  return f"❌ {e}", history, thread_id, image_url
127
 
128
- # ============ Auto-Send Voice Toggle ============
129
- def maybe_send_transcript(transcript, history, thread_id, image_url, voice_only_enabled, client_id):
130
- if voice_only_enabled and transcript.strip():
131
- # Clear transcript after sending
132
- if client_id in connections:
133
- connections[client_id].transcript = ""
134
- return handle_chat(transcript, history, thread_id, image_url)
135
- return transcript, history, thread_id, image_url
136
-
137
  # ============ Gradio UI ============
138
  with gr.Blocks(theme=gr.themes.Soft()) as app:
139
  gr.Markdown("# 📄 Document AI Assistant")
140
 
141
- # STATES
142
  chat_state = gr.State([])
143
  thread_state = gr.State()
144
  image_state = gr.State()
145
  client_id = gr.State()
146
  voice_enabled = gr.State(False)
147
- voice_only_state = gr.State(True)
148
 
149
  with gr.Row(equal_height=True):
150
  with gr.Column(scale=1):
@@ -163,9 +153,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
163
  voice_input = gr.Audio(label="Mic", streaming=True)
164
  voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
165
  clear_btn = gr.Button("🧹 Clear Transcript")
166
- voice_only_toggle = gr.Checkbox(label="Voice-Only Mode 🎤➡️💬", value=True)
167
 
168
- # UI Event Bindings
169
  def toggle_voice(curr):
170
  return not curr, gr.update(visible=not curr)
171
 
@@ -174,21 +163,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
174
  inputs=[user_prompt, chat_state, thread_state, image_state],
175
  outputs=[user_prompt, chat, thread_state, image_state])
176
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
177
-
178
- # Real-time audio streaming
179
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
180
  clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
181
-
182
- # Auto-send voice transcript if Voice-Only Mode is enabled
183
- voice_input.change(
184
- fn=maybe_send_transcript,
185
- inputs=[voice_transcript, chat_state, thread_state, image_state, voice_only_state, client_id],
186
- outputs=[user_prompt, chat, thread_state, image_state]
187
- )
188
-
189
- voice_only_toggle.change(fn=lambda x: x, inputs=voice_only_toggle, outputs=voice_only_state)
190
-
191
- # Initialize WebSocket connection
192
  app.load(fn=create_ws, outputs=[client_id])
193
 
194
  app.launch()
 
1
+ # top of the file
2
  import gradio as gr
3
  import os, time, re, json, base64, asyncio, threading, uuid, io
4
  import numpy as np
 
8
  from websockets import connect
9
  from dotenv import load_dotenv
10
 
11
+ # Load secrets
12
  load_dotenv()
13
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
  ASSISTANT_ID = os.getenv("ASSISTANT_ID")
 
18
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
19
  connections = {}
20
 
21
+ # WebSocket Client
22
  class WebSocketClient:
23
  def __init__(self, uri, headers, client_id):
24
+ self.uri = uri
25
+ self.headers = headers
26
+ self.client_id = client_id
27
  self.websocket = None
28
  self.queue = asyncio.Queue(maxsize=10)
29
  self.transcript = ""
 
71
  if data["type"] == "conversation.item.input_audio_transcription.delta":
72
  self.transcript += data["delta"]
73
 
74
+ # Real-time transcription connection manager
75
  def create_ws():
76
  cid = str(uuid.uuid4())
77
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
106
 
107
  while True:
108
  status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
109
+ if status.status == "completed": break
 
110
  time.sleep(1)
111
 
112
  msgs = client.beta.threads.messages.list(thread_id=thread_id)
 
118
  r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
119
  content
120
  )
121
+ if match: image_url = match.group(0)
 
122
  break
123
 
124
  return "", history, thread_id, image_url
 
126
  except Exception as e:
127
  return f"❌ {e}", history, thread_id, image_url
128
 
 
 
 
 
 
 
 
 
 
129
  # ============ Gradio UI ============
130
  with gr.Blocks(theme=gr.themes.Soft()) as app:
131
  gr.Markdown("# 📄 Document AI Assistant")
132
 
 
133
  chat_state = gr.State([])
134
  thread_state = gr.State()
135
  image_state = gr.State()
136
  client_id = gr.State()
137
  voice_enabled = gr.State(False)
 
138
 
139
  with gr.Row(equal_height=True):
140
  with gr.Column(scale=1):
 
153
  voice_input = gr.Audio(label="Mic", streaming=True)
154
  voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
155
  clear_btn = gr.Button("🧹 Clear Transcript")
 
156
 
157
+ # Functional bindings
158
  def toggle_voice(curr):
159
  return not curr, gr.update(visible=not curr)
160
 
 
163
  inputs=[user_prompt, chat_state, thread_state, image_state],
164
  outputs=[user_prompt, chat, thread_state, image_state])
165
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
 
 
166
  voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
167
  clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
 
 
 
 
 
 
 
 
 
 
 
168
  app.load(fn=create_ws, outputs=[client_id])
169
 
170
  app.launch()