MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 14

Commit

71678c7

verified ·

1 Parent(s): 15d59d2

Update qa.py

Browse files

Files changed (1) hide show

qa.py +26 -16

qa.py CHANGED Viewed

@@ -1,40 +1,33 @@
 # qa.py
 import os
 import json
 import tempfile
 import streamlit as st
-from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration, AudioProcessorBase
 from utils import generate_audio_mp3, call_groq_api_for_qa
 import av
 import pydub
-import wave
 import numpy as np
-# For streaming from the mic, we need some RTC configuration
-RTC_CONFIGURATION = RTCConfiguration(
-    {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
-)
 class AudioBufferProcessor(AudioProcessorBase):
     """
     A custom audio processor that accumulates raw audio frames in memory.
-    When the user stops, we can finalize them into a single WAV for STT.
     """
     def __init__(self) -> None:
         self.frames = []
     def recv_audio(self, frame: av.AudioFrame) -> av.AudioFrame:
-        # Convert the audio frame to a pydub AudioSegment
         pcm = frame.to_ndarray()
-        # The shape is (channels, samples)
-        # We'll assume single channel or handle the first channel
         if pcm.ndim == 2 and pcm.shape[0] > 1:
-            # If stereo, just take the first channel for STT
             pcm = pcm[0, :]
         sample_rate = frame.sample_rate
         samples = pcm.astype(np.int16).tobytes()
         segment = pydub.AudioSegment(
@@ -48,8 +41,7 @@ class AudioBufferProcessor(AudioProcessorBase):
     def finalize_wav(self) -> str:
         """
-        Once the user stops recording, combine frames into a single WAV file.
-        Returns path to the wav file.
         """
         if not self.frames:
             return ""
@@ -58,6 +50,25 @@ class AudioBufferProcessor(AudioProcessorBase):
             combined.export(tmp_wav.name, format="wav")
             return tmp_wav.name
 def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes, str):
     """
@@ -86,7 +97,6 @@ def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes,
     if not answer_text.strip():
         return (None, "")
-    # TTS
     audio_file_path = generate_audio_mp3(answer_text, "John")
     with open(audio_file_path, "rb") as f:
         audio_bytes = f.read()

 # qa.py
 import os
+import requests
 import json
 import tempfile
 import streamlit as st
 from utils import generate_audio_mp3, call_groq_api_for_qa
 import av
 import pydub
 import numpy as np
+from streamlit_webrtc import AudioProcessorBase
 class AudioBufferProcessor(AudioProcessorBase):
     """
     A custom audio processor that accumulates raw audio frames in memory.
+    Now we also count frames for debugging.
     """
     def __init__(self) -> None:
         self.frames = []
+        self.frame_count = 0
     def recv_audio(self, frame: av.AudioFrame) -> av.AudioFrame:
+        self.frame_count += 1  # debug increment
         pcm = frame.to_ndarray()
         if pcm.ndim == 2 and pcm.shape[0] > 1:
             pcm = pcm[0, :]
         sample_rate = frame.sample_rate
         samples = pcm.astype(np.int16).tobytes()
         segment = pydub.AudioSegment(
     def finalize_wav(self) -> str:
         """
+        Combine frames into one WAV file if we have them.
         """
         if not self.frames:
             return ""
             combined.export(tmp_wav.name, format="wav")
             return tmp_wav.name
+def transcribe_audio_deepgram(local_audio_path: str) -> str:
+    """
+    Sends a local audio file to Deepgram STT, returns the transcript text if successful.
+    """
+    DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
+    if not DEEPGRAM_API_KEY:
+        raise ValueError("Deepgram API key not found in environment variables.")
+    url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
+    headers = {
+        "Authorization": f"Token {DEEPGRAM_API_KEY}",
+        "Content-Type": "audio/wav"
+    }
+    with open(local_audio_path, "rb") as f:
+        response = requests.post(url, headers=headers, data=f)
+    response.raise_for_status()
+    data = response.json()
+    transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
+    return transcript
 def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes, str):
     """
     if not answer_text.strip():
         return (None, "")
     audio_file_path = generate_audio_mp3(answer_text, "John")
     with open(audio_file_path, "rb") as f:
         audio_bytes = f.read()