IAMTFRMZA commited on
Commit
e760d91
Β·
verified Β·
1 Parent(s): 7783ebe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -3
app.py CHANGED
@@ -1,10 +1,41 @@
1
- import base64
 
 
 
2
  import requests
3
  import tempfile
4
- import streamlit as st
5
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # Whisper transcription function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def transcribe_audio(file_path, api_key):
9
  with open(file_path, "rb") as f:
10
  response = requests.post(
@@ -14,3 +45,132 @@ def transcribe_audio(file_path, api_key):
14
  data={"model": "whisper-1"}
15
  )
16
  return response.json().get("text", None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ import re
5
  import requests
6
  import tempfile
 
7
  from openai import OpenAI
8
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
9
+ import av
10
+ import numpy as np
11
+ import wave
12
+
13
+ # ------------------ Configuration ------------------
14
+ st.set_page_config(page_title="Document AI Assistant", layout="wide")
15
+ st.title("πŸ“„ Document AI Assistant")
16
+ st.caption("Chat with an AI Assistant on your medical/pathology documents")
17
+
18
+ # ------------------ Secrets ------------------
19
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
20
+ ASSISTANT_ID = os.environ.get("ASSISTANT_ID")
21
 
22
+ if not OPENAI_API_KEY or not ASSISTANT_ID:
23
+ st.error("❌ Missing secrets. Please set both OPENAI_API_KEY and ASSISTANT_ID in your Hugging Face Space settings.")
24
+ st.stop()
25
+
26
+ client = OpenAI(api_key=OPENAI_API_KEY)
27
+
28
+ # ------------------ Session State ------------------
29
+ if "messages" not in st.session_state:
30
+ st.session_state.messages = []
31
+ if "thread_id" not in st.session_state:
32
+ st.session_state.thread_id = None
33
+ if "image_url" not in st.session_state:
34
+ st.session_state.image_url = None
35
+ if "audio_buffer" not in st.session_state:
36
+ st.session_state.audio_buffer = []
37
+
38
+ # ------------------ Whisper Transcription ------------------
39
  def transcribe_audio(file_path, api_key):
40
  with open(file_path, "rb") as f:
41
  response = requests.post(
 
45
  data={"model": "whisper-1"}
46
  )
47
  return response.json().get("text", None)
48
+
49
+ # ------------------ Audio Recorder ------------------
50
+ class AudioProcessor:
51
+ def __init__(self):
52
+ self.frames = []
53
+
54
+ def recv(self, frame):
55
+ audio = frame.to_ndarray()
56
+ self.frames.append(audio)
57
+ return av.AudioFrame.from_ndarray(audio, layout="mono")
58
+
59
+ def save_wav(frames, path, rate=48000):
60
+ audio_data = np.concatenate(frames)
61
+ with wave.open(path, 'wb') as wf:
62
+ wf.setnchannels(1)
63
+ wf.setsampwidth(2)
64
+ wf.setframerate(rate)
65
+ wf.writeframes(audio_data.tobytes())
66
+
67
+ # ------------------ Sidebar & Image Panel ------------------
68
+ st.sidebar.header("πŸ”§ Settings")
69
+ if st.sidebar.button("πŸ”„ Clear Chat"):
70
+ st.session_state.messages = []
71
+ st.session_state.thread_id = None
72
+ st.session_state.image_url = None
73
+ st.rerun()
74
+
75
+ show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
76
+ col1, col2 = st.columns([1, 2])
77
+
78
+ with col1:
79
+ if show_image and st.session_state.image_url:
80
+ st.image(st.session_state.image_url, caption="πŸ“‘ Extracted Page", use_container_width=True)
81
+
82
+ # ------------------ Chat & Voice Panel ------------------
83
+ with col2:
84
+ # Display previous messages
85
+ for message in st.session_state.messages:
86
+ st.chat_message(message["role"]).write(message["content"])
87
+
88
+ # 🎀 Real-time voice recorder
89
+ st.subheader("πŸŽ™οΈ Ask with your voice")
90
+ audio_ctx = webrtc_streamer(
91
+ key="speech",
92
+ mode=WebRtcMode.SENDONLY,
93
+ in_audio_enabled=True,
94
+ audio_receiver_size=256,
95
+ client_settings=ClientSettings(
96
+ media_stream_constraints={"audio": True, "video": False},
97
+ rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]},
98
+ ),
99
+ )
100
+
101
+ if audio_ctx.audio_receiver:
102
+ audio_processor = AudioProcessor()
103
+ result = audio_ctx.audio_receiver.recv()
104
+ audio_data = result.to_ndarray()
105
+ st.session_state.audio_buffer.append(audio_data)
106
+
107
+ # ⏱️ Auto stop after short time
108
+ if len(st.session_state.audio_buffer) > 30: # about 3s
109
+ tmp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
110
+ save_wav(st.session_state.audio_buffer, tmp_path)
111
+ st.session_state.audio_buffer = []
112
+
113
+ with st.spinner("🧠 Transcribing..."):
114
+ transcript = transcribe_audio(tmp_path, OPENAI_API_KEY)
115
+
116
+ if transcript:
117
+ st.success("πŸ“ " + transcript)
118
+ st.session_state.messages.append({"role": "user", "content": transcript})
119
+ st.chat_message("user").write(transcript)
120
+ prompt = transcript
121
+
122
+ # ---- Assistant interaction ----
123
+ try:
124
+ if st.session_state.thread_id is None:
125
+ thread = client.beta.threads.create()
126
+ st.session_state.thread_id = thread.id
127
+
128
+ thread_id = st.session_state.thread_id
129
+
130
+ client.beta.threads.messages.create(
131
+ thread_id=thread_id,
132
+ role="user",
133
+ content=prompt
134
+ )
135
+
136
+ run = client.beta.threads.runs.create(
137
+ thread_id=thread_id,
138
+ assistant_id=ASSISTANT_ID
139
+ )
140
+
141
+ with st.spinner("Assistant is thinking..."):
142
+ while True:
143
+ run_status = client.beta.threads.runs.retrieve(
144
+ thread_id=thread_id,
145
+ run_id=run.id
146
+ )
147
+ if run_status.status == "completed":
148
+ break
149
+ time.sleep(1)
150
+
151
+ messages = client.beta.threads.messages.list(thread_id=thread_id)
152
+ assistant_message = None
153
+ for message in reversed(messages.data):
154
+ if message.role == "assistant":
155
+ assistant_message = message.content[0].text.value
156
+ break
157
+
158
+ st.chat_message("assistant").write(assistant_message)
159
+ st.session_state.messages.append({"role": "assistant", "content": assistant_message})
160
+
161
+ # Image link extract
162
+ image_match = re.search(
163
+ r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
164
+ assistant_message
165
+ )
166
+ if image_match:
167
+ st.session_state.image_url = image_match.group(0)
168
+
169
+ except Exception as e:
170
+ st.error(f"❌ Error: {str(e)}")
171
+
172
+ # Fallback text input
173
+ if prompt := st.chat_input("πŸ’¬ Or type your question..."):
174
+ st.session_state.messages.append({"role": "user", "content": prompt})
175
+ st.chat_message("user").write(prompt)
176
+ # Send prompt to assistant logic follows same flow above (you can wrap in a function)