IAMTFRMZA commited on
Commit
004ead9
Β·
verified Β·
1 Parent(s): ef80150

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -85
app.py CHANGED
@@ -1,105 +1,101 @@
1
  import gradio as gr
2
  import os
3
- import json
4
  import uuid
5
- import threading
6
- import time
7
- import re
8
- from dotenv import load_dotenv
9
  from openai import OpenAI
10
  from realtime_transcriber import WebSocketClient, connections, WEBSOCKET_URI, WEBSOCKET_HEADERS
11
 
12
- # ------------------ Load Secrets ------------------
13
- load_dotenv()
14
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15
- ASSISTANT_ID = os.getenv("ASSISTANT_ID")
16
-
17
- if not OPENAI_API_KEY or not ASSISTANT_ID:
18
- raise ValueError("Missing OPENAI_API_KEY or ASSISTANT_ID")
19
-
20
  client = OpenAI(api_key=OPENAI_API_KEY)
21
- session_threads = {}
22
-
23
- # ------------------ Chat Logic ------------------
24
- def reset_session():
25
- session_id = str(uuid.uuid4())
26
- session_threads[session_id] = client.beta.threads.create().id
27
- return session_id
28
-
29
- def process_chat(message, history, session_id):
30
- thread_id = session_threads.get(session_id)
31
- if not thread_id:
32
- thread_id = client.beta.threads.create().id
33
- session_threads[session_id] = thread_id
34
-
35
- client.beta.threads.messages.create(thread_id=thread_id, role="user", content=message)
36
- run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)
37
-
38
- while client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id).status != "completed":
39
- time.sleep(1)
40
-
41
- messages = client.beta.threads.messages.list(thread_id=thread_id)
42
- for msg in reversed(messages.data):
43
- if msg.role == "assistant":
44
- return msg.content[0].text.value
45
- return "⚠️ Assistant did not respond."
46
 
47
- def extract_image_url(text):
48
- match = re.search(r'https://raw\.githubusercontent\.com/[^\s"]+\.png', text)
49
- return match.group(0) if match else None
50
-
51
- def handle_chat(message, history, session_id):
52
- response = process_chat(message, history, session_id)
53
- history.append((message, response))
54
- image = extract_image_url(response)
55
- return history, image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # ------------------ Voice Logic ------------------
58
- def create_websocket_client():
59
- client_id = str(uuid.uuid4())
60
- connections[client_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, client_id)
61
- threading.Thread(target=connections[client_id].run, daemon=True).start()
62
- return client_id
63
 
64
- def clear_transcript(client_id):
65
- if client_id in connections:
66
- connections[client_id].transcript = ""
67
- return ""
68
 
69
- def send_audio_chunk(audio, client_id):
70
- if client_id not in connections:
71
- return "Initializing connection..."
72
- sr, y = audio
73
- connections[client_id].enqueue_audio_chunk(sr, y)
74
- return connections[client_id].transcript
75
 
76
- # ------------------ UI ------------------
77
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
78
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
79
-
80
- session_id = gr.State(value=reset_session())
81
- client_id = gr.State()
 
 
82
 
83
  with gr.Row():
84
- image_display = gr.Image(label="πŸ“‘ Extracted Document Image", show_label=True, height=480, width=400)
85
- with gr.Column():
86
- chatbot = gr.Chatbot(label="πŸ’¬ Document Assistant", height=480)
87
- text_input = gr.Textbox(label="Ask about the document", placeholder="e.g. What is clause 3.2?")
88
- send_btn = gr.Button("Send")
89
 
90
- send_btn.click(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
91
- text_input.submit(handle_chat, inputs=[text_input, chatbot, session_id], outputs=[chatbot, image_display])
92
 
93
- # Toggle Section
94
- with gr.Accordion("🎀 Or Use Voice Instead", open=False):
95
- with gr.Row():
96
- transcript_box = gr.Textbox(label="Live Transcript", lines=7, interactive=False, autoscroll=True)
97
  with gr.Row():
98
- mic_input = gr.Audio(streaming=True)
99
- clear_button = gr.Button("Clear Transcript")
100
-
101
- mic_input.stream(fn=send_audio_chunk, inputs=[mic_input, client_id], outputs=transcript_box)
102
- clear_button.click(fn=clear_transcript, inputs=[client_id], outputs=transcript_box)
103
- demo.load(fn=create_websocket_client, outputs=client_id)
104
 
105
  demo.launch()
 
1
  import gradio as gr
2
  import os
 
3
  import uuid
 
 
 
 
4
  from openai import OpenAI
5
  from realtime_transcriber import WebSocketClient, connections, WEBSOCKET_URI, WEBSOCKET_HEADERS
6
 
7
+ # Load OpenAI API key
8
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
9
+ if not OPENAI_API_KEY:
10
+ raise ValueError("OPENAI_API_KEY environment variable must be set")
 
 
 
 
11
  client = OpenAI(api_key=OPENAI_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Session state
14
+ session_id = str(uuid.uuid4())
15
+ if session_id not in connections:
16
+ connections[session_id] = WebSocketClient(WEBSOCKET_URI, WEBSOCKET_HEADERS, session_id)
17
+ connections[session_id].start()
18
+
19
+ # Functions for Document Assistant
20
+ def process_user_input(message, history):
21
+ if not message:
22
+ return "Please enter a message.", history
23
+
24
+ try:
25
+ thread = client.beta.threads.create()
26
+ client.beta.threads.messages.create(
27
+ thread_id=thread.id,
28
+ role="user",
29
+ content=message
30
+ )
31
+ run = client.beta.threads.runs.create(
32
+ thread_id=thread.id,
33
+ assistant_id=os.environ.get("ASSISTANT_ID")
34
+ )
35
+ while True:
36
+ run_status = client.beta.threads.runs.retrieve(
37
+ thread_id=thread.id,
38
+ run_id=run.id
39
+ )
40
+ if run_status.status == "completed":
41
+ break
42
+ messages = client.beta.threads.messages.list(thread_id=thread.id)
43
+ assistant_reply = next((m.content[0].text.value for m in reversed(messages.data) if m.role == "assistant"), "No response.")
44
+ history.append((message, assistant_reply))
45
+ return "", history
46
+ except Exception as e:
47
+ return f"❌ Error: {str(e)}", history
48
+
49
+ # Functions for Realtime Voice Transcription
50
+ def send_audio_chunk_realtime(mic_chunk):
51
+ if session_id not in connections:
52
+ return "Initializing voice session..."
53
+ if mic_chunk is not None:
54
+ sr, y = mic_chunk
55
+ connections[session_id].enqueue_audio_chunk(sr, y)
56
+ return connections[session_id].transcript
57
+
58
+ def clear_transcript():
59
+ if session_id in connections:
60
+ connections[session_id].transcript = ""
61
+ return ""
62
 
63
+ # Gradio UI Components
64
+ doc_image = gr.Image(label="πŸ“˜ Extracted Document Image", show_label=True, elem_id="docimg", height=500, width=360)
65
+ chatbot = gr.Chatbot(label="🧠 Document Assistant", elem_id="chatbox", bubble_full_width=False)
66
+ prompt = gr.Textbox(placeholder="Ask about the document...", label="Ask about the document")
67
+ send_btn = gr.Button("Send")
 
68
 
69
+ # Voice Section
70
+ audio_in = gr.Audio(label="🎡 Audio", type="numpy", streaming=True)
71
+ live_transcript = gr.Textbox(label="Live Transcript", lines=6)
72
+ clear_btn = gr.Button("Clear Transcript")
73
 
74
+ with gr.Blocks(theme=gr.themes.Base(), css="""
75
+ #docimg img { object-fit: contain !important; }
76
+ #chatbox { height: 500px; }
77
+ .gr-box { border-radius: 12px; }
78
+ """) as demo:
 
79
 
 
 
80
  gr.Markdown("# 🧠 Document AI + πŸŽ™οΈ Voice Assistant")
81
+ with gr.Row():
82
+ with gr.Column(scale=1):
83
+ doc_image.render()
84
+ with gr.Column(scale=2):
85
+ chatbot.render()
86
 
87
  with gr.Row():
88
+ prompt.render()
89
+ send_btn.render()
 
 
 
90
 
91
+ send_btn.click(fn=process_user_input, inputs=[prompt, chatbot], outputs=[prompt, chatbot])
 
92
 
93
+ with gr.Accordion("πŸŽ™οΈ Or Use Voice Instead", open=False):
94
+ live_transcript.render()
 
 
95
  with gr.Row():
96
+ audio_in.render()
97
+ clear_btn.render()
98
+ audio_in.stream(fn=send_audio_chunk_realtime, inputs=audio_in, outputs=live_transcript)
99
+ clear_btn.click(fn=clear_transcript, outputs=live_transcript)
 
 
100
 
101
  demo.launch()