IAMTFRMZA commited on
Commit
1c29e60
Β·
verified Β·
1 Parent(s): a0f6b11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -46
app.py CHANGED
@@ -1,6 +1,4 @@
1
  import streamlit as st
2
- from streamlit_webrtc import webrtc_streamer, AudioProcessorBase
3
- import av
4
  import numpy as np
5
  import tempfile
6
  import soundfile as sf
@@ -8,15 +6,7 @@ import os
8
  import time
9
  import re
10
  from openai import OpenAI
11
-
12
- # ------------------ Audio Processor ------------------
13
- class AudioRecorder(AudioProcessorBase):
14
- def __init__(self):
15
- self.recorded_frames = []
16
-
17
- def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
18
- self.recorded_frames.append(frame)
19
- return frame
20
 
21
  # ------------------ App Configuration ------------------
22
  st.set_page_config(page_title="Document AI Assistant", layout="wide")
@@ -71,40 +61,34 @@ with col1:
71
  # ------------------ Voice Input Processing ------------------
72
  with col2:
73
  st.markdown("### πŸŽ™οΈ Voice Input (Optional)")
74
- webrtc_ctx = webrtc_streamer(
75
- key="voice-input",
76
- mode="SENDONLY",
77
- audio_processor_factory=AudioRecorder,
78
- media_stream_constraints={"audio": True, "video": False},
79
- async_processing=True,
80
- )
81
-
82
- if webrtc_ctx.audio_processor and not webrtc_ctx.state.playing and webrtc_ctx.audio_processor.recorded_frames:
83
  st.info("Transcribing your voice...")
84
- wav_path = tempfile.mktemp(suffix=".wav")
85
- with open(wav_path, "wb") as f:
86
- frames = webrtc_ctx.audio_processor.recorded_frames
87
- audio = frames[0].to_ndarray()
88
- for frame in frames[1:]:
89
- audio = np.concatenate((audio, frame.to_ndarray()), axis=1)
90
- sf.write(f, audio.T, samplerate=frames[0].sample_rate, format="WAV")
91
-
92
- audio_file = open(wav_path, "rb")
93
- try:
94
- whisper_result = client.audio.transcriptions.create(model="whisper-1", file=audio_file, response_format="json")
95
- transcript = whisper_result.text.strip()
96
- confidence = whisper_result.get("confidence", "N/A")
97
-
98
- if transcript:
99
- st.success(f"Recognized: {transcript}")
100
- st.caption(f"🧠 Confidence: {confidence}")
101
- if preview_toggle:
102
- st.session_state.transcript_preview = transcript
103
- else:
104
- st.session_state.messages.append({"role": "user", "content": transcript})
105
- st.rerun()
106
- except Exception as e:
107
- st.error(f"❌ Transcription failed: {str(e)}")
108
 
109
  if st.session_state.transcript_preview:
110
  st.markdown("---")
@@ -151,7 +135,7 @@ with col2:
151
  st.session_state.messages.append({"role": "assistant", "content": assistant_message})
152
 
153
  image_match = re.search(
154
- r'https://raw\\.githubusercontent\\.com/AndrewLORTech/surgical-pathology-manual/main/[\\w\\-/]*\\.png',
155
  assistant_message
156
  )
157
  if image_match:
@@ -164,4 +148,4 @@ with col2:
164
 
165
  for message in reversed(st.session_state.messages):
166
  role, content = message["role"], message["content"]
167
- st.chat_message(role).write(content)
 
1
  import streamlit as st
 
 
2
  import numpy as np
3
  import tempfile
4
  import soundfile as sf
 
6
  import time
7
  import re
8
  from openai import OpenAI
9
+ from streamlit_audio_recorder import audio_recorder
 
 
 
 
 
 
 
 
10
 
11
  # ------------------ App Configuration ------------------
12
  st.set_page_config(page_title="Document AI Assistant", layout="wide")
 
61
  # ------------------ Voice Input Processing ------------------
62
  with col2:
63
  st.markdown("### πŸŽ™οΈ Voice Input (Optional)")
64
+ audio_bytes = audio_recorder(pause_threshold=2.0)
65
+
66
+ if audio_bytes:
 
 
 
 
 
 
67
  st.info("Transcribing your voice...")
68
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
69
+ tmp.write(audio_bytes)
70
+ tmp_path = tmp.name
71
+
72
+ with open(tmp_path, "rb") as audio_file:
73
+ try:
74
+ whisper_result = client.audio.transcriptions.create(
75
+ model="whisper-1",
76
+ file=audio_file,
77
+ response_format="json"
78
+ )
79
+ transcript = whisper_result.text.strip()
80
+ confidence = whisper_result.get("confidence", "N/A")
81
+
82
+ if transcript:
83
+ st.success(f"Recognized: {transcript}")
84
+ st.caption(f"🧠 Confidence: {confidence}")
85
+ if preview_toggle:
86
+ st.session_state.transcript_preview = transcript
87
+ else:
88
+ st.session_state.messages.append({"role": "user", "content": transcript})
89
+ st.rerun()
90
+ except Exception as e:
91
+ st.error(f"❌ Transcription failed: {str(e)}")
92
 
93
  if st.session_state.transcript_preview:
94
  st.markdown("---")
 
135
  st.session_state.messages.append({"role": "assistant", "content": assistant_message})
136
 
137
  image_match = re.search(
138
+ r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
139
  assistant_message
140
  )
141
  if image_match:
 
148
 
149
  for message in reversed(st.session_state.messages):
150
  role, content = message["role"], message["content"]
151
+ st.chat_message(role).write(content)