IAMTFRMZA commited on
Commit
4102a15
Β·
verified Β·
1 Parent(s): df3e42c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -27
app.py CHANGED
@@ -3,6 +3,11 @@ import os
3
  import time
4
  import re
5
  from openai import OpenAI
 
 
 
 
 
6
 
7
  # ------------------ App Configuration ------------------
8
  st.set_page_config(page_title="Document AI Assistant", layout="wide")
@@ -29,6 +34,8 @@ if "image_url" not in st.session_state:
29
  st.session_state.image_url = None
30
  if "image_updated" not in st.session_state:
31
  st.session_state.image_updated = False
 
 
32
 
33
  # ------------------ Sidebar Controls ------------------
34
  st.sidebar.header("πŸ”§ Settings")
@@ -37,59 +44,101 @@ if st.sidebar.button("πŸ”„ Clear Chat"):
37
  st.session_state.thread_id = None
38
  st.session_state.image_url = None
39
  st.session_state.image_updated = False
 
40
  st.rerun()
41
 
42
  show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
 
43
 
44
  # ------------------ Split Layout ------------------
45
- col1, col2 = st.columns([1, 2]) # Adjust ratio as needed
46
 
47
  # ------------------ Image Panel (Left) ------------------
48
  with col1:
49
  if show_image and st.session_state.image_url:
50
  st.image(st.session_state.image_url, caption="πŸ“‘ Extracted Page", use_container_width=True)
51
- st.session_state.image_updated = False # Reset flag after rendering
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # ------------------ Chat Panel (Right) ------------------
54
  with col2:
55
- # πŸ”Ό Chat input stays at the top
56
  if prompt := st.chat_input("Type your question about the document..."):
57
  st.session_state.messages.append({"role": "user", "content": prompt})
58
  st.chat_message("user").write(prompt)
59
 
60
  try:
61
- # Initialize thread if needed
62
  if st.session_state.thread_id is None:
63
  thread = client.beta.threads.create()
64
  st.session_state.thread_id = thread.id
65
 
66
  thread_id = st.session_state.thread_id
 
 
67
 
68
- # Send message to assistant
69
- client.beta.threads.messages.create(
70
- thread_id=thread_id,
71
- role="user",
72
- content=prompt
73
- )
74
-
75
- # Run assistant
76
- run = client.beta.threads.runs.create(
77
- thread_id=thread_id,
78
- assistant_id=ASSISTANT_ID
79
- )
80
-
81
- # Wait for assistant response
82
  with st.spinner("πŸ€– Assistant is thinking..."):
83
  while True:
84
- run_status = client.beta.threads.runs.retrieve(
85
- thread_id=thread_id,
86
- run_id=run.id
87
- )
88
  if run_status.status == "completed":
89
  break
90
  time.sleep(1)
91
 
92
- # Get assistant response
93
  messages = client.beta.threads.messages.list(thread_id=thread_id)
94
  assistant_message = None
95
  for message in reversed(messages.data):
@@ -100,9 +149,8 @@ with col2:
100
  st.chat_message("assistant").write(assistant_message)
101
  st.session_state.messages.append({"role": "assistant", "content": assistant_message})
102
 
103
- # Extract GitHub image URL if present
104
  image_match = re.search(
105
- r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
106
  assistant_message
107
  )
108
  if image_match:
@@ -113,8 +161,6 @@ with col2:
113
  except Exception as e:
114
  st.error(f"❌ Error: {str(e)}")
115
 
116
- # πŸ”½ Show previous messages below the input
117
  for message in reversed(st.session_state.messages):
118
  role, content = message["role"], message["content"]
119
  st.chat_message(role).write(content)
120
-
 
3
  import time
4
  import re
5
  from openai import OpenAI
6
+ from streamlit_webrtc import webrtc_streamer, AudioProcessorBase
7
+ import av
8
+ import numpy as np
9
+ import tempfile
10
+ import soundfile as sf
11
 
12
  # ------------------ App Configuration ------------------
13
  st.set_page_config(page_title="Document AI Assistant", layout="wide")
 
34
  st.session_state.image_url = None
35
  if "image_updated" not in st.session_state:
36
  st.session_state.image_updated = False
37
+ if "transcript_preview" not in st.session_state:
38
+ st.session_state.transcript_preview = None
39
 
40
  # ------------------ Sidebar Controls ------------------
41
  st.sidebar.header("πŸ”§ Settings")
 
44
  st.session_state.thread_id = None
45
  st.session_state.image_url = None
46
  st.session_state.image_updated = False
47
+ st.session_state.transcript_preview = None
48
  st.rerun()
49
 
50
  show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
51
+ preview_toggle = st.sidebar.checkbox("πŸ“ Preview transcription before sending", value=True)
52
 
53
  # ------------------ Split Layout ------------------
54
+ col1, col2 = st.columns([1, 2])
55
 
56
  # ------------------ Image Panel (Left) ------------------
57
  with col1:
58
  if show_image and st.session_state.image_url:
59
  st.image(st.session_state.image_url, caption="πŸ“‘ Extracted Page", use_container_width=True)
60
+ st.session_state.image_updated = False
61
+
62
+ # ------------------ Voice Input Processing ------------------
63
+ class AudioRecorder(AudioProcessorBase):
64
+ def __init__(self):
65
+ self.recorded_frames = []
66
+
67
+ def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
68
+ self.recorded_frames.append(frame)
69
+ return frame
70
+
71
+ with col2:
72
+ st.markdown("### πŸŽ™οΈ Voice Input (Optional)")
73
+ webrtc_ctx = webrtc_streamer(
74
+ key="voice-input",
75
+ mode="SENDONLY",
76
+ audio_processor_factory=AudioRecorder,
77
+ media_stream_constraints={"audio": True, "video": False},
78
+ async_processing=True,
79
+ )
80
+
81
+ if webrtc_ctx.audio_processor and not webrtc_ctx.state.playing and webrtc_ctx.audio_processor.recorded_frames:
82
+ st.info("Transcribing your voice...")
83
+ wav_path = tempfile.mktemp(suffix=".wav")
84
+ with open(wav_path, "wb") as f:
85
+ frames = webrtc_ctx.audio_processor.recorded_frames
86
+ audio = frames[0].to_ndarray()
87
+ for frame in frames[1:]:
88
+ audio = np.concatenate((audio, frame.to_ndarray()), axis=1)
89
+ sf.write(f, audio.T, samplerate=frames[0].sample_rate, format="WAV")
90
+
91
+ audio_file = open(wav_path, "rb")
92
+ try:
93
+ whisper_result = client.audio.transcriptions.create(model="whisper-1", file=audio_file, response_format="json")
94
+ transcript = whisper_result.text.strip()
95
+ confidence = whisper_result.get("confidence", "N/A")
96
+
97
+ if transcript:
98
+ st.success(f"Recognized: {transcript}")
99
+ st.caption(f"🧠 Confidence: {confidence}")
100
+ if preview_toggle:
101
+ st.session_state.transcript_preview = transcript
102
+ else:
103
+ st.session_state.messages.append({"role": "user", "content": transcript})
104
+ st.rerun()
105
+ except Exception as e:
106
+ st.error(f"❌ Transcription failed: {str(e)}")
107
+
108
+ if st.session_state.transcript_preview:
109
+ st.markdown("---")
110
+ st.markdown("### πŸ“ Transcription Preview")
111
+ st.markdown(f"> {st.session_state.transcript_preview}")
112
+ if st.button("βœ… Send to Assistant"):
113
+ st.session_state.messages.append({"role": "user", "content": st.session_state.transcript_preview})
114
+ st.session_state.transcript_preview = None
115
+ st.rerun()
116
+ if st.button("❌ Discard"):
117
+ st.session_state.transcript_preview = None
118
+ st.rerun()
119
 
120
  # ------------------ Chat Panel (Right) ------------------
121
  with col2:
 
122
  if prompt := st.chat_input("Type your question about the document..."):
123
  st.session_state.messages.append({"role": "user", "content": prompt})
124
  st.chat_message("user").write(prompt)
125
 
126
  try:
 
127
  if st.session_state.thread_id is None:
128
  thread = client.beta.threads.create()
129
  st.session_state.thread_id = thread.id
130
 
131
  thread_id = st.session_state.thread_id
132
+ client.beta.threads.messages.create(thread_id=thread_id, role="user", content=prompt)
133
+ run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=ASSISTANT_ID)
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  with st.spinner("πŸ€– Assistant is thinking..."):
136
  while True:
137
+ run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
 
 
 
138
  if run_status.status == "completed":
139
  break
140
  time.sleep(1)
141
 
 
142
  messages = client.beta.threads.messages.list(thread_id=thread_id)
143
  assistant_message = None
144
  for message in reversed(messages.data):
 
149
  st.chat_message("assistant").write(assistant_message)
150
  st.session_state.messages.append({"role": "assistant", "content": assistant_message})
151
 
 
152
  image_match = re.search(
153
+ r'https://raw\\.githubusercontent\\.com/AndrewLORTech/surgical-pathology-manual/main/[\\w\\-/]*\\.png',
154
  assistant_message
155
  )
156
  if image_match:
 
161
  except Exception as e:
162
  st.error(f"❌ Error: {str(e)}")
163
 
 
164
  for message in reversed(st.session_state.messages):
165
  role, content = message["role"], message["content"]
166
  st.chat_message(role).write(content)