siddhartharyaai commited on
Commit
70347ba
·
verified ·
1 Parent(s): aacfe72

Create qa.py

Browse files
Files changed (1) hide show
  1. qa.py +93 -0
qa.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # qa.py
2
+
3
+ import os
4
+ import requests
5
+ import tempfile
6
+ import json
7
+ import streamlit as st
8
+
9
+ from utils import generate_audio_mp3 # We'll reuse your existing TTS function
10
+
11
+ def transcribe_audio_deepgram(local_audio_path: str) -> str:
12
+ """
13
+ Sends a local audio file to Deepgram for STT.
14
+ Returns the transcript text if successful, or raises an error if failed.
15
+ """
16
+ DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
17
+ if not DEEPGRAM_API_KEY:
18
+ raise ValueError("Deepgram API key not found in environment variables.")
19
+
20
+ url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
21
+ # For WAV, set Content-Type to audio/wav. For MP3, use audio/mpeg, etc.
22
+ headers = {
23
+ "Authorization": f"Token {DEEPGRAM_API_KEY}",
24
+ "Content-Type": "audio/wav",
25
+ }
26
+ with open(local_audio_path, "rb") as f:
27
+ response = requests.post(url, headers=headers, data=f)
28
+ response.raise_for_status()
29
+
30
+ data = response.json()
31
+ # Extract the transcript
32
+ transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
33
+ return transcript
34
+
35
+ def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
36
+ """
37
+ Minimal function that calls your LLM (Groq) to answer a follow-up question.
38
+ Returns a Python dict, e.g.: {"speaker": "John", "text": "..."}
39
+ """
40
+ # Example system prompt:
41
+ system_prompt = f"""
42
+ You are John, the guest on this podcast. The user is asking a follow-up question.
43
+ Conversation so far:
44
+ {conversation_so_far}
45
+
46
+ New user question:
47
+ {user_question}
48
+
49
+ Please respond in JSON with keys "speaker" and "text", e.g.:
50
+ {{ "speaker": "John", "text": "Sure, here's my answer..." }}
51
+ """
52
+
53
+ # Here, you'd do something like your existing generate_script or direct Groq call
54
+ # For demonstration, let's do a placeholder
55
+ # This should be replaced by your real LLM call, e.g., call_groq_api(system_prompt)
56
+ # and parse out the JSON response.
57
+
58
+ # Example pseudo-code:
59
+ from utils import call_groq_api_for_qa # Suppose you define this in utils
60
+
61
+ raw_json_response = call_groq_api_for_qa(system_prompt)
62
+ # Expect something like: {"speaker": "John", "text": "Here's an answer."}
63
+ response_dict = json.loads(raw_json_response)
64
+ return response_dict
65
+
66
+ def handle_qa_exchange(user_question: str) -> (bytes, str):
67
+ """
68
+ 1) Read conversation_so_far from session_state
69
+ 2) Call the LLM for a short follow-up answer
70
+ 3) Generate TTS audio
71
+ 4) Return (audio_bytes, answer_text)
72
+ """
73
+ conversation_so_far = st.session_state.get("conversation_history", "")
74
+
75
+ # Ask LLM
76
+ response_dict = call_llm_for_qa(conversation_so_far, user_question)
77
+ answer_text = response_dict.get("text", "")
78
+ speaker = response_dict.get("speaker", "John")
79
+
80
+ # Update conversation
81
+ # We store: "User: question" and "John: answer"
82
+ new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
83
+ st.session_state["conversation_history"] = new_history
84
+
85
+ if not answer_text.strip():
86
+ return (None, "")
87
+
88
+ # TTS
89
+ audio_file_path = generate_audio_mp3(answer_text, "John") # using John voice
90
+ with open(audio_file_path, "rb") as f:
91
+ audio_bytes = f.read()
92
+
93
+ return (audio_bytes, answer_text)