MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 14

Commit

70347ba

verified ·

1 Parent(s): aacfe72

Create qa.py

Browse files

Files changed (1) hide show

qa.py +93 -0

qa.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# qa.py
+import os
+import requests
+import tempfile
+import json
+import streamlit as st
+from utils import generate_audio_mp3  # We'll reuse your existing TTS function
+def transcribe_audio_deepgram(local_audio_path: str) -> str:
+    """
+    Sends a local audio file to Deepgram for STT.
+    Returns the transcript text if successful, or raises an error if failed.
+    """
+    DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
+    if not DEEPGRAM_API_KEY:
+        raise ValueError("Deepgram API key not found in environment variables.")
+    url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
+    # For WAV, set Content-Type to audio/wav. For MP3, use audio/mpeg, etc.
+    headers = {
+        "Authorization": f"Token {DEEPGRAM_API_KEY}",
+        "Content-Type": "audio/wav",
+    }
+    with open(local_audio_path, "rb") as f:
+        response = requests.post(url, headers=headers, data=f)
+    response.raise_for_status()
+    data = response.json()
+    # Extract the transcript
+    transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
+    return transcript
+def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
+    """
+    Minimal function that calls your LLM (Groq) to answer a follow-up question.
+    Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
+    """
+    # Example system prompt:
+    system_prompt = f"""
+    You are John, the guest on this podcast. The user is asking a follow-up question.
+    Conversation so far:
+    {conversation_so_far}
+    New user question:
+    {user_question}
+    Please respond in JSON with keys "speaker" and "text", e.g.:
+    {{ "speaker": "John", "text": "Sure, here's my answer..." }}
+    """
+    # Here, you'd do something like your existing generate_script or direct Groq call
+    # For demonstration, let's do a placeholder
+    # This should be replaced by your real LLM call, e.g., call_groq_api(system_prompt)
+    # and parse out the JSON response.
+    # Example pseudo-code:
+    from utils import call_groq_api_for_qa  # Suppose you define this in utils
+    raw_json_response = call_groq_api_for_qa(system_prompt)
+    # Expect something like: {"speaker": "John", "text": "Here's an answer."}
+    response_dict = json.loads(raw_json_response)
+    return response_dict
+def handle_qa_exchange(user_question: str) -> (bytes, str):
+    """
+    1) Read conversation_so_far from session_state
+    2) Call the LLM for a short follow-up answer
+    3) Generate TTS audio
+    4) Return (audio_bytes, answer_text)
+    """
+    conversation_so_far = st.session_state.get("conversation_history", "")
+    # Ask LLM
+    response_dict = call_llm_for_qa(conversation_so_far, user_question)
+    answer_text = response_dict.get("text", "")
+    speaker = response_dict.get("speaker", "John")
+    # Update conversation
+    # We store: "User: question" and "John: answer"
+    new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
+    st.session_state["conversation_history"] = new_history
+    if not answer_text.strip():
+        return (None, "")
+    # TTS
+    audio_file_path = generate_audio_mp3(answer_text, "John")  # using John voice
+    with open(audio_file_path, "rb") as f:
+        audio_bytes = f.read()
+    return (audio_bytes, answer_text)