IAMTFRMZA commited on
Commit
f383782
Β·
verified Β·
1 Parent(s): b74ae51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -19
app.py CHANGED
@@ -20,7 +20,7 @@ if not OPENAI_API_KEY or not ASSISTANT_ID:
20
 
21
  client = OpenAI(api_key=OPENAI_API_KEY)
22
 
23
- # ------------------ Chat Logic ------------------
24
  session_threads = {}
25
 
26
  def reset_session():
@@ -71,7 +71,12 @@ def extract_image_url(text):
71
  )
72
  return match.group(0) if match else None
73
 
74
- # ------------------ Transcription Logic ------------------
 
 
 
 
 
75
  def create_websocket_client():
76
  client_id = str(uuid.uuid4())
77
  connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
@@ -90,42 +95,30 @@ def send_audio_chunk(audio, client_id):
90
  connections[client_id].enqueue_audio_chunk(sr, y)
91
  return connections[client_id].transcript
92
 
93
- # ------------------ Gradio App ------------------
94
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
95
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
96
 
97
  session_id = gr.State(value=reset_session())
98
  client_id = gr.State()
99
- image_url = gr.State(value=None)
100
 
101
  with gr.Row():
102
  with gr.Column(scale=1):
103
  image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=400)
104
  with gr.Column(scale=2):
105
  chatbot = gr.ChatInterface(
106
- fn=lambda message, history, session_id: process_chat(message, history, session_id),
107
  additional_inputs=[session_id],
 
108
  examples=[
109
  ["What does clause 3.2 mean?"],
110
  ["Summarize the timeline from the image."]
111
  ],
112
  title="πŸ’¬ Document Assistant"
113
  )
 
114
 
115
- # Inject logic to extract image when assistant replies
116
- def handle_reply_and_update_image(message, history, session_id):
117
- response = process_chat(message, history, session_id)
118
- url = extract_image_url(response)
119
- return response, url
120
-
121
- chatbot.fn = lambda message, history, session_id: handle_reply_and_update_image(message, history, session_id)[0]
122
- chatbot.chatbot.change(
123
- fn=lambda m, h, s: handle_reply_and_update_image(m, h, s)[1],
124
- inputs=[chatbot.input, chatbot.chatbot, session_id],
125
- outputs=image_display
126
- )
127
-
128
- # ------------------ Voice Transcription ------------------
129
  gr.Markdown("## πŸŽ™οΈ Realtime Voice Transcription")
130
 
131
  with gr.Row():
 
20
 
21
  client = OpenAI(api_key=OPENAI_API_KEY)
22
 
23
+ # ------------------ Chat Session Logic ------------------
24
  session_threads = {}
25
 
26
  def reset_session():
 
71
  )
72
  return match.group(0) if match else None
73
 
74
+ def chat_with_image(message, history, session_id):
75
+ reply = process_chat(message, history, session_id)
76
+ image_url = extract_image_url(reply)
77
+ return reply, image_url
78
+
79
+ # ------------------ Voice Transcription ------------------
80
  def create_websocket_client():
81
  client_id = str(uuid.uuid4())
82
  connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
 
95
  connections[client_id].enqueue_audio_chunk(sr, y)
96
  return connections[client_id].transcript
97
 
98
+ # ------------------ Gradio UI ------------------
99
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
100
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
101
 
102
  session_id = gr.State(value=reset_session())
103
  client_id = gr.State()
 
104
 
105
  with gr.Row():
106
  with gr.Column(scale=1):
107
  image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=400)
108
  with gr.Column(scale=2):
109
  chatbot = gr.ChatInterface(
110
+ fn=chat_with_image,
111
  additional_inputs=[session_id],
112
+ outputs=["text", "image"],
113
  examples=[
114
  ["What does clause 3.2 mean?"],
115
  ["Summarize the timeline from the image."]
116
  ],
117
  title="πŸ’¬ Document Assistant"
118
  )
119
+ chatbot.render()
120
 
121
+ # ------------------ Transcription Section ------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  gr.Markdown("## πŸŽ™οΈ Realtime Voice Transcription")
123
 
124
  with gr.Row():