MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 14

Commit

38c419f

verified ·

1 Parent(s): dd7ace6

Update qa.py

Browse files

Files changed (1) hide show

qa.py +34 -50

qa.py CHANGED Viewed

@@ -6,76 +6,38 @@ import json
 import tempfile
 import streamlit as st
-from utils import generate_audio_mp3, call_groq_api_for_qa
-import av
-import pydub
-import numpy as np
-from streamlit_webrtc import AudioProcessorBase
-class AudioBufferProcessor(AudioProcessorBase):
-    """
-    A custom audio processor that accumulates raw audio frames in memory.
-    Now we also count frames for debugging.
-    """
-    def __init__(self) -> None:
-        self.frames = []
-        self.frame_count = 0
-    def recv_audio(self, frame: av.AudioFrame) -> av.AudioFrame:
-        self.frame_count += 1  # debug increment
-        pcm = frame.to_ndarray()
-        if pcm.ndim == 2 and pcm.shape[0] > 1:
-            pcm = pcm[0, :]
-        sample_rate = frame.sample_rate
-        samples = pcm.astype(np.int16).tobytes()
-        segment = pydub.AudioSegment(
-            data=samples,
-            sample_width=2,  # int16
-            frame_rate=sample_rate,
-            channels=1
-        )
-        self.frames.append(segment)
-        return frame
-    def finalize_wav(self) -> str:
-        """
-        Combine frames into one WAV file if we have them.
-        """
-        if not self.frames:
-            return ""
-        combined = sum(self.frames)
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
-            combined.export(tmp_wav.name, format="wav")
-            return tmp_wav.name
 def transcribe_audio_deepgram(local_audio_path: str) -> str:
     """
-    Sends a local audio file to Deepgram STT, returns the transcript text if successful.
     """
     DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
     if not DEEPGRAM_API_KEY:
         raise ValueError("Deepgram API key not found in environment variables.")
     url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
     headers = {
         "Authorization": f"Token {DEEPGRAM_API_KEY}",
         "Content-Type": "audio/wav"
     }
     with open(local_audio_path, "rb") as f:
         response = requests.post(url, headers=headers, data=f)
     response.raise_for_status()
     data = response.json()
     transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
     return transcript
-def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes, str):
     """
-    1) Build system prompt from conversation_so_far + user_question
-    2) Call the LLM to get short JSON
-    3) TTS the answer
-    4) Return (audio_bytes, answer_text)
     """
     system_prompt = f"""
     You are John, the guest speaker. The user is asking a follow-up question.
@@ -89,15 +51,37 @@ def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes,
     {{ "speaker": "John", "text": "Sure, here's my answer..." }}
     """
     raw_json_response = call_groq_api_for_qa(system_prompt)
     response_dict = json.loads(raw_json_response)
     answer_text = response_dict.get("text", "")
     speaker = response_dict.get("speaker", "John")
     if not answer_text.strip():
         return (None, "")
-    audio_file_path = generate_audio_mp3(answer_text, "John")
     with open(audio_file_path, "rb") as f:
         audio_bytes = f.read()

 import tempfile
 import streamlit as st
+from utils import generate_audio_mp3  # Reuse your existing TTS function
 def transcribe_audio_deepgram(local_audio_path: str) -> str:
     """
+    Sends a local audio file to Deepgram for STT.
+    Returns the transcript text if successful, or raises an error if failed.
     """
     DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
     if not DEEPGRAM_API_KEY:
         raise ValueError("Deepgram API key not found in environment variables.")
     url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
+    # For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
     headers = {
         "Authorization": f"Token {DEEPGRAM_API_KEY}",
         "Content-Type": "audio/wav"
     }
     with open(local_audio_path, "rb") as f:
         response = requests.post(url, headers=headers, data=f)
     response.raise_for_status()
     data = response.json()
+    # Extract the transcript
     transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
     return transcript
+def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
     """
+    Minimal function that calls your LLM (Groq) to answer a follow-up question.
+    Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
     """
     system_prompt = f"""
     You are John, the guest speaker. The user is asking a follow-up question.
     {{ "speaker": "John", "text": "Sure, here's my answer..." }}
     """
+    from utils import call_groq_api_for_qa
     raw_json_response = call_groq_api_for_qa(system_prompt)
+    # Expect a JSON string: {"speaker": "John", "text": "some short answer"}
     response_dict = json.loads(raw_json_response)
+    return response_dict
+def handle_qa_exchange(user_question: str) -> (bytes, str):
+    """
+    1) Read conversation_so_far from session_state
+    2) Call the LLM for a short follow-up answer
+    3) Generate TTS audio
+    4) Return (audio_bytes, answer_text)
+    """
+    conversation_so_far = st.session_state.get("conversation_history", "")
+    # Ask the LLM
+    response_dict = call_llm_for_qa(conversation_so_far, user_question)
     answer_text = response_dict.get("text", "")
     speaker = response_dict.get("speaker", "John")
+    # Update conversation
+    new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
+    st.session_state["conversation_history"] = new_history
     if not answer_text.strip():
         return (None, "")
+    # TTS
+    audio_file_path = generate_audio_mp3(answer_text, "John")  # always John
     with open(audio_file_path, "rb") as f:
         audio_bytes = f.read()