siddhartharyaai commited on
Commit
905e8ce
·
verified ·
1 Parent(s): f867612

Update qa.py

Browse files
Files changed (1) hide show
  1. qa.py +31 -16
qa.py CHANGED
@@ -1,18 +1,24 @@
 
 
1
  import os
2
  import requests
3
  import json
 
4
  import streamlit as st
5
- from utils import generate_audio_mp3
 
6
 
7
  def transcribe_audio_deepgram(local_audio_path: str) -> str:
8
  """
9
- Sends a local audio file to Deepgram for STT (Speech-to-Text).
 
10
  """
11
  DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
12
  if not DEEPGRAM_API_KEY:
13
  raise ValueError("Deepgram API key not found in environment variables.")
14
 
15
  url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
 
16
  headers = {
17
  "Authorization": f"Token {DEEPGRAM_API_KEY}",
18
  "Content-Type": "audio/wav"
@@ -23,51 +29,60 @@ def transcribe_audio_deepgram(local_audio_path: str) -> str:
23
  response.raise_for_status()
24
 
25
  data = response.json()
26
- return data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
 
 
 
27
 
28
  def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
29
  """
30
- Calls LLM (Groq API) to generate a structured response to a follow-up question.
 
31
  """
32
  system_prompt = f"""
33
- You are an expert guest in a podcast session. The user is asking a follow-up question.
34
- Previous conversation:
35
  {conversation_so_far}
36
 
37
  New user question:
38
  {user_question}
39
 
40
  Please respond in JSON with keys "speaker" and "text", e.g.:
41
- {{ "speaker": "Guest", "text": "Sure, here's my answer..." }}
42
  """
43
 
44
- from utils import query_llm_for_additional_info
45
 
46
- raw_json_response = query_llm_for_additional_info(user_question, conversation_so_far)
 
47
  response_dict = json.loads(raw_json_response)
48
  return response_dict
49
 
 
50
  def handle_qa_exchange(user_question: str) -> (bytes, str):
51
  """
52
- 1) Reads conversation_so_far from session_state
53
- 2) Calls the LLM for a follow-up answer
54
- 3) Generates TTS audio
55
- 4) Returns (audio_bytes, answer_text)
56
  """
57
  conversation_so_far = st.session_state.get("conversation_history", "")
58
 
 
59
  response_dict = call_llm_for_qa(conversation_so_far, user_question)
60
  answer_text = response_dict.get("text", "")
61
- speaker = response_dict.get("speaker", "Guest")
62
 
 
63
  new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
64
  st.session_state["conversation_history"] = new_history
65
 
66
  if not answer_text.strip():
67
  return (None, "")
68
 
69
- audio_file_path = generate_audio_mp3(answer_text, speaker)
 
70
  with open(audio_file_path, "rb") as f:
71
  audio_bytes = f.read()
72
 
73
- return (audio_bytes, answer_text)
 
1
+ # qa.py
2
+
3
  import os
4
  import requests
5
  import json
6
+ import tempfile
7
  import streamlit as st
8
+
9
+ from utils import generate_audio_mp3 # Reuse your existing TTS function
10
 
11
  def transcribe_audio_deepgram(local_audio_path: str) -> str:
12
  """
13
+ Sends a local audio file to Deepgram for STT.
14
+ Returns the transcript text if successful, or raises an error if failed.
15
  """
16
  DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
17
  if not DEEPGRAM_API_KEY:
18
  raise ValueError("Deepgram API key not found in environment variables.")
19
 
20
  url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
21
+ # For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
22
  headers = {
23
  "Authorization": f"Token {DEEPGRAM_API_KEY}",
24
  "Content-Type": "audio/wav"
 
29
  response.raise_for_status()
30
 
31
  data = response.json()
32
+ # Extract the transcript
33
+ transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
34
+ return transcript
35
+
36
 
37
  def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
38
  """
39
+ Minimal function that calls your LLM (Groq) to answer a follow-up question.
40
+ Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
41
  """
42
  system_prompt = f"""
43
+ You are John, the guest speaker. The user is asking a follow-up question.
44
+ Conversation so far:
45
  {conversation_so_far}
46
 
47
  New user question:
48
  {user_question}
49
 
50
  Please respond in JSON with keys "speaker" and "text", e.g.:
51
+ {{ "speaker": "John", "text": "Sure, here's my answer..." }}
52
  """
53
 
54
+ from utils import call_groq_api_for_qa
55
 
56
+ raw_json_response = call_groq_api_for_qa(system_prompt)
57
+ # Expect a JSON string: {"speaker": "John", "text": "some short answer"}
58
  response_dict = json.loads(raw_json_response)
59
  return response_dict
60
 
61
+
62
  def handle_qa_exchange(user_question: str) -> (bytes, str):
63
  """
64
+ 1) Read conversation_so_far from session_state
65
+ 2) Call the LLM for a short follow-up answer
66
+ 3) Generate TTS audio
67
+ 4) Return (audio_bytes, answer_text)
68
  """
69
  conversation_so_far = st.session_state.get("conversation_history", "")
70
 
71
+ # Ask the LLM
72
  response_dict = call_llm_for_qa(conversation_so_far, user_question)
73
  answer_text = response_dict.get("text", "")
74
+ speaker = response_dict.get("speaker", "John")
75
 
76
+ # Update conversation
77
  new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
78
  st.session_state["conversation_history"] = new_history
79
 
80
  if not answer_text.strip():
81
  return (None, "")
82
 
83
+ # TTS
84
+ audio_file_path = generate_audio_mp3(answer_text, "John") # always John
85
  with open(audio_file_path, "rb") as f:
86
  audio_bytes = f.read()
87
 
88
+ return (audio_bytes, answer_text)