siddhartharyaai commited on
Commit
71678c7
·
verified ·
1 Parent(s): 15d59d2

Update qa.py

Browse files
Files changed (1) hide show
  1. qa.py +26 -16
qa.py CHANGED
@@ -1,40 +1,33 @@
1
  # qa.py
2
 
3
  import os
 
4
  import json
5
  import tempfile
6
  import streamlit as st
7
- from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration, AudioProcessorBase
8
 
9
  from utils import generate_audio_mp3, call_groq_api_for_qa
10
-
11
  import av
12
  import pydub
13
- import wave
14
  import numpy as np
15
 
16
- # For streaming from the mic, we need some RTC configuration
17
- RTC_CONFIGURATION = RTCConfiguration(
18
- {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
19
- )
20
 
21
  class AudioBufferProcessor(AudioProcessorBase):
22
  """
23
  A custom audio processor that accumulates raw audio frames in memory.
24
- When the user stops, we can finalize them into a single WAV for STT.
25
  """
26
  def __init__(self) -> None:
27
  self.frames = []
 
28
 
29
  def recv_audio(self, frame: av.AudioFrame) -> av.AudioFrame:
30
- # Convert the audio frame to a pydub AudioSegment
 
31
  pcm = frame.to_ndarray()
32
- # The shape is (channels, samples)
33
- # We'll assume single channel or handle the first channel
34
  if pcm.ndim == 2 and pcm.shape[0] > 1:
35
- # If stereo, just take the first channel for STT
36
  pcm = pcm[0, :]
37
-
38
  sample_rate = frame.sample_rate
39
  samples = pcm.astype(np.int16).tobytes()
40
  segment = pydub.AudioSegment(
@@ -48,8 +41,7 @@ class AudioBufferProcessor(AudioProcessorBase):
48
 
49
  def finalize_wav(self) -> str:
50
  """
51
- Once the user stops recording, combine frames into a single WAV file.
52
- Returns path to the wav file.
53
  """
54
  if not self.frames:
55
  return ""
@@ -58,6 +50,25 @@ class AudioBufferProcessor(AudioProcessorBase):
58
  combined.export(tmp_wav.name, format="wav")
59
  return tmp_wav.name
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes, str):
63
  """
@@ -86,7 +97,6 @@ def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes,
86
  if not answer_text.strip():
87
  return (None, "")
88
 
89
- # TTS
90
  audio_file_path = generate_audio_mp3(answer_text, "John")
91
  with open(audio_file_path, "rb") as f:
92
  audio_bytes = f.read()
 
1
  # qa.py
2
 
3
  import os
4
+ import requests
5
  import json
6
  import tempfile
7
  import streamlit as st
 
8
 
9
  from utils import generate_audio_mp3, call_groq_api_for_qa
 
10
  import av
11
  import pydub
 
12
  import numpy as np
13
 
14
+ from streamlit_webrtc import AudioProcessorBase
 
 
 
15
 
16
  class AudioBufferProcessor(AudioProcessorBase):
17
  """
18
  A custom audio processor that accumulates raw audio frames in memory.
19
+ Now we also count frames for debugging.
20
  """
21
  def __init__(self) -> None:
22
  self.frames = []
23
+ self.frame_count = 0
24
 
25
  def recv_audio(self, frame: av.AudioFrame) -> av.AudioFrame:
26
+ self.frame_count += 1 # debug increment
27
+
28
  pcm = frame.to_ndarray()
 
 
29
  if pcm.ndim == 2 and pcm.shape[0] > 1:
 
30
  pcm = pcm[0, :]
 
31
  sample_rate = frame.sample_rate
32
  samples = pcm.astype(np.int16).tobytes()
33
  segment = pydub.AudioSegment(
 
41
 
42
  def finalize_wav(self) -> str:
43
  """
44
+ Combine frames into one WAV file if we have them.
 
45
  """
46
  if not self.frames:
47
  return ""
 
50
  combined.export(tmp_wav.name, format="wav")
51
  return tmp_wav.name
52
 
53
+ def transcribe_audio_deepgram(local_audio_path: str) -> str:
54
+ """
55
+ Sends a local audio file to Deepgram STT, returns the transcript text if successful.
56
+ """
57
+ DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
58
+ if not DEEPGRAM_API_KEY:
59
+ raise ValueError("Deepgram API key not found in environment variables.")
60
+
61
+ url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
62
+ headers = {
63
+ "Authorization": f"Token {DEEPGRAM_API_KEY}",
64
+ "Content-Type": "audio/wav"
65
+ }
66
+ with open(local_audio_path, "rb") as f:
67
+ response = requests.post(url, headers=headers, data=f)
68
+ response.raise_for_status()
69
+ data = response.json()
70
+ transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
71
+ return transcript
72
 
73
  def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes, str):
74
  """
 
97
  if not answer_text.strip():
98
  return (None, "")
99
 
 
100
  audio_file_path = generate_audio_mp3(answer_text, "John")
101
  with open(audio_file_path, "rb") as f:
102
  audio_bytes = f.read()