SearchPod1.0

Sleeping

App Files Files Community

siddhartharyaai commited on Feb 9

Commit

905e8ce

verified ·

1 Parent(s): f867612

Update qa.py

Browse files

Files changed (1) hide show

qa.py +31 -16

qa.py CHANGED Viewed

@@ -1,18 +1,24 @@
 import os
 import requests
 import json
 import streamlit as st
-from utils import generate_audio_mp3
 def transcribe_audio_deepgram(local_audio_path: str) -> str:
     """
-    Sends a local audio file to Deepgram for STT (Speech-to-Text).
     """
     DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
     if not DEEPGRAM_API_KEY:
         raise ValueError("Deepgram API key not found in environment variables.")
     url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
     headers = {
         "Authorization": f"Token {DEEPGRAM_API_KEY}",
         "Content-Type": "audio/wav"
@@ -23,51 +29,60 @@ def transcribe_audio_deepgram(local_audio_path: str) -> str:
     response.raise_for_status()
     data = response.json()
-    return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
 def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
     """
-    Calls LLM (Groq API) to generate a structured response to a follow-up question.
     """
     system_prompt = f"""
-    You are an expert guest in a podcast session. The user is asking a follow-up question.
-    Previous conversation:
     {conversation_so_far}
     New user question:
     {user_question}
     Please respond in JSON with keys "speaker" and "text", e.g.:
-    {{ "speaker": "Guest", "text": "Sure, here's my answer..." }}
     """
-    from utils import query_llm_for_additional_info
-    raw_json_response = query_llm_for_additional_info(user_question, conversation_so_far)
     response_dict = json.loads(raw_json_response)
     return response_dict
 def handle_qa_exchange(user_question: str) -> (bytes, str):
     """
-    1) Reads conversation_so_far from session_state
-    2) Calls the LLM for a follow-up answer
-    3) Generates TTS audio
-    4) Returns (audio_bytes, answer_text)
     """
     conversation_so_far = st.session_state.get("conversation_history", "")
     response_dict = call_llm_for_qa(conversation_so_far, user_question)
     answer_text = response_dict.get("text", "")
-    speaker = response_dict.get("speaker", "Guest")
     new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
     st.session_state["conversation_history"] = new_history
     if not answer_text.strip():
         return (None, "")
-    audio_file_path = generate_audio_mp3(answer_text, speaker)
     with open(audio_file_path, "rb") as f:
         audio_bytes = f.read()
-    return (audio_bytes, answer_text)

+# qa.py
 import os
 import requests
 import json
+import tempfile
 import streamlit as st
+from utils import generate_audio_mp3  # Reuse your existing TTS function
 def transcribe_audio_deepgram(local_audio_path: str) -> str:
     """
+    Sends a local audio file to Deepgram for STT.
+    Returns the transcript text if successful, or raises an error if failed.
     """
     DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
     if not DEEPGRAM_API_KEY:
         raise ValueError("Deepgram API key not found in environment variables.")
     url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
+    # For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
     headers = {
         "Authorization": f"Token {DEEPGRAM_API_KEY}",
         "Content-Type": "audio/wav"
     response.raise_for_status()
     data = response.json()
+    # Extract the transcript
+    transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
+    return transcript
 def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
     """
+    Minimal function that calls your LLM (Groq) to answer a follow-up question.
+    Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
     """
     system_prompt = f"""
+    You are John, the guest speaker. The user is asking a follow-up question.
+    Conversation so far:
     {conversation_so_far}
     New user question:
     {user_question}
     Please respond in JSON with keys "speaker" and "text", e.g.:
+    {{ "speaker": "John", "text": "Sure, here's my answer..." }}
     """
+    from utils import call_groq_api_for_qa
+    raw_json_response = call_groq_api_for_qa(system_prompt)
+    # Expect a JSON string: {"speaker": "John", "text": "some short answer"}
     response_dict = json.loads(raw_json_response)
     return response_dict
 def handle_qa_exchange(user_question: str) -> (bytes, str):
     """
+    1) Read conversation_so_far from session_state
+    2) Call the LLM for a short follow-up answer
+    3) Generate TTS audio
+    4) Return (audio_bytes, answer_text)
     """
     conversation_so_far = st.session_state.get("conversation_history", "")
+    # Ask the LLM
     response_dict = call_llm_for_qa(conversation_so_far, user_question)
     answer_text = response_dict.get("text", "")
+    speaker = response_dict.get("speaker", "John")
+    # Update conversation
     new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
     st.session_state["conversation_history"] = new_history
     if not answer_text.strip():
         return (None, "")
+    # TTS
+    audio_file_path = generate_audio_mp3(answer_text, "John")  # always John
     with open(audio_file_path, "rb") as f:
         audio_bytes = f.read()
+    return (audio_bytes, answer_text)