MyPod_10

Running

App Files Files Community

MyPod_10 / qa.py

siddhartharyaai

Update qa.py

b0e78f5 verified 4 months ago

raw

history blame

2.93 kB

	# qa.py

	import os
	import json
	import tempfile
	import streamlit as st
	from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration, AudioProcessorBase

	from utils import generate_audio_mp3, call_groq_api_for_qa

	import av
	import pydub
	import wave
	import numpy as np

	# For streaming from the mic, we need some RTC configuration
	RTC_CONFIGURATION = RTCConfiguration(
	{"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
	)

	class AudioBufferProcessor(AudioProcessorBase):
	"""
	A custom audio processor that accumulates raw audio frames in memory.
	When the user stops, we can finalize them into a single WAV for STT.
	"""
	def __init__(self) -> None:
	self.frames = []

	def recv_audio(self, frame: av.AudioFrame) -> av.AudioFrame:
	# Convert the audio frame to a pydub AudioSegment
	pcm = frame.to_ndarray()
	# The shape is (channels, samples)
	# We'll assume single channel or handle the first channel
	if pcm.ndim == 2 and pcm.shape[0] > 1:
	# If stereo, just take the first channel for STT
	pcm = pcm[0, :]

	sample_rate = frame.sample_rate
	samples = pcm.astype(np.int16).tobytes()
	segment = pydub.AudioSegment(
	data=samples,
	sample_width=2, # int16
	frame_rate=sample_rate,
	channels=1
	)
	self.frames.append(segment)
	return frame

	def finalize_wav(self) -> str:
	"""
	Once the user stops recording, combine frames into a single WAV file.
	Returns path to the wav file.
	"""
	if not self.frames:
	return ""
	combined = sum(self.frames)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
	combined.export(tmp_wav.name, format="wav")
	return tmp_wav.name


	def handle_qa_exchange(conversation_so_far: str, user_question: str) -> (bytes, str):
	"""
	1) Build system prompt from conversation_so_far + user_question
	2) Call the LLM to get short JSON
	3) TTS the answer
	4) Return (audio_bytes, answer_text)
	"""
	system_prompt = f"""
	You are John, the guest speaker. The user is asking a follow-up question.
	Conversation so far:
	{conversation_so_far}

	New user question:
	{user_question}

	Please respond in JSON with keys "speaker" and "text", e.g.:
	{{ "speaker": "John", "text": "Sure, here's my answer..." }}
	"""

	raw_json_response = call_groq_api_for_qa(system_prompt)
	response_dict = json.loads(raw_json_response)
	answer_text = response_dict.get("text", "")
	speaker = response_dict.get("speaker", "John")

	if not answer_text.strip():
	return (None, "")

	# TTS
	audio_file_path = generate_audio_mp3(answer_text, "John")
	with open(audio_file_path, "rb") as f:
	audio_bytes = f.read()

	return (audio_bytes, answer_text)