siddhartharyaai commited on
Commit
38c419f
·
verified ·
1 Parent(s): dd7ace6

Update qa.py

Browse files
Files changed (1) hide show
  1. qa.py +34 -50
qa.py CHANGED
@@ -6,76 +6,38 @@ import json
6
  import tempfile
7
  import streamlit as st
8
 
9
- from utils import generate_audio_mp3, call_groq_api_for_qa
10
- import av
11
- import pydub
12
- import numpy as np
13
-
14
- from streamlit_webrtc import AudioProcessorBase
15
-
16
- class AudioBufferProcessor(AudioProcessorBase):
17
- """
18
- A custom audio processor that accumulates raw audio frames in memory.
19
- Now we also count frames for debugging.
20
- """
21
- def __init__(self) -> None:
22
- self.frames = []
23
- self.frame_count = 0
24
-
25
- def recv_audio(self, frame: av.AudioFrame) -> av.AudioFrame:
26
- self.frame_count += 1 # debug increment
27
-
28
- pcm = frame.to_ndarray()
29
- if pcm.ndim == 2 and pcm.shape[0] > 1:
30
- pcm = pcm[0, :]
31
- sample_rate = frame.sample_rate
32
- samples = pcm.astype(np.int16).tobytes()
33
- segment = pydub.AudioSegment(
34
- data=samples,
35
- sample_width=2, # int16
36
- frame_rate=sample_rate,
37
- channels=1
38
- )
39
- self.frames.append(segment)
40
- return frame
41
-
42
- def finalize_wav(self) -> str:
43
- """
44
- Combine frames into one WAV file if we have them.
45
- """
46
- if not self.frames:
47
- return ""
48
- combined = sum(self.frames)
49
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
50
- combined.export(tmp_wav.name, format="wav")
51
- return tmp_wav.name
52
 
53
  def transcribe_audio_deepgram(local_audio_path: str) -> str:
54
  """
55
- Sends a local audio file to Deepgram STT, returns the transcript text if successful.
 
56
  """
57
  DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
58
  if not DEEPGRAM_API_KEY:
59
  raise ValueError("Deepgram API key not found in environment variables.")
60
 
61
  url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
 
62
  headers = {
63
  "Authorization": f"Token {DEEPGRAM_API_KEY}",
64
  "Content-Type": "audio/wav"
65
  }
 
66
  with open(local_audio_path, "rb") as f:
67
  response = requests.post(url, headers=headers, data=f)
68
  response.raise_for_status()
 
69
  data = response.json()
 
70
  transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
71
  return transcript
72
 
73
- def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes, str):
 
74
  """
75
- 1) Build system prompt from conversation_so_far + user_question
76
- 2) Call the LLM to get short JSON
77
- 3) TTS the answer
78
- 4) Return (audio_bytes, answer_text)
79
  """
80
  system_prompt = f"""
81
  You are John, the guest speaker. The user is asking a follow-up question.
@@ -89,15 +51,37 @@ def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes,
89
  {{ "speaker": "John", "text": "Sure, here's my answer..." }}
90
  """
91
 
 
 
92
  raw_json_response = call_groq_api_for_qa(system_prompt)
 
93
  response_dict = json.loads(raw_json_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  answer_text = response_dict.get("text", "")
95
  speaker = response_dict.get("speaker", "John")
96
 
 
 
 
 
97
  if not answer_text.strip():
98
  return (None, "")
99
 
100
- audio_file_path = generate_audio_mp3(answer_text, "John")
 
101
  with open(audio_file_path, "rb") as f:
102
  audio_bytes = f.read()
103
 
 
6
  import tempfile
7
  import streamlit as st
8
 
9
+ from utils import generate_audio_mp3 # Reuse your existing TTS function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def transcribe_audio_deepgram(local_audio_path: str) -> str:
12
  """
13
+ Sends a local audio file to Deepgram for STT.
14
+ Returns the transcript text if successful, or raises an error if failed.
15
  """
16
  DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
17
  if not DEEPGRAM_API_KEY:
18
  raise ValueError("Deepgram API key not found in environment variables.")
19
 
20
  url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
21
+ # For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg".
22
  headers = {
23
  "Authorization": f"Token {DEEPGRAM_API_KEY}",
24
  "Content-Type": "audio/wav"
25
  }
26
+
27
  with open(local_audio_path, "rb") as f:
28
  response = requests.post(url, headers=headers, data=f)
29
  response.raise_for_status()
30
+
31
  data = response.json()
32
+ # Extract the transcript
33
  transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
34
  return transcript
35
 
36
+
37
+ def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
38
  """
39
+ Minimal function that calls your LLM (Groq) to answer a follow-up question.
40
+ Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
 
 
41
  """
42
  system_prompt = f"""
43
  You are John, the guest speaker. The user is asking a follow-up question.
 
51
  {{ "speaker": "John", "text": "Sure, here's my answer..." }}
52
  """
53
 
54
+ from utils import call_groq_api_for_qa
55
+
56
  raw_json_response = call_groq_api_for_qa(system_prompt)
57
+ # Expect a JSON string: {"speaker": "John", "text": "some short answer"}
58
  response_dict = json.loads(raw_json_response)
59
+ return response_dict
60
+
61
+
62
+ def handle_qa_exchange(user_question: str) -> (bytes, str):
63
+ """
64
+ 1) Read conversation_so_far from session_state
65
+ 2) Call the LLM for a short follow-up answer
66
+ 3) Generate TTS audio
67
+ 4) Return (audio_bytes, answer_text)
68
+ """
69
+ conversation_so_far = st.session_state.get("conversation_history", "")
70
+
71
+ # Ask the LLM
72
+ response_dict = call_llm_for_qa(conversation_so_far, user_question)
73
  answer_text = response_dict.get("text", "")
74
  speaker = response_dict.get("speaker", "John")
75
 
76
+ # Update conversation
77
+ new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
78
+ st.session_state["conversation_history"] = new_history
79
+
80
  if not answer_text.strip():
81
  return (None, "")
82
 
83
+ # TTS
84
+ audio_file_path = generate_audio_mp3(answer_text, "John") # always John
85
  with open(audio_file_path, "rb") as f:
86
  audio_bytes = f.read()
87