Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -21,22 +21,13 @@ from utils import (
|
|
21 |
)
|
22 |
from prompts import SYSTEM_PROMPT
|
23 |
|
24 |
-
#
|
25 |
-
from qa import
|
|
|
26 |
|
27 |
-
MAX_QA_QUESTIONS = 5
|
28 |
|
29 |
def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
|
30 |
-
"""
|
31 |
-
Looks for lines like:
|
32 |
-
**Angela**: Hello
|
33 |
-
**Dimitris**: Great topic...
|
34 |
-
We treat 'Angela' as the raw display_speaker, 'Hello' as text.
|
35 |
-
Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
|
36 |
-
'Dimitris' -> speaker='John' (if it matches guest_name), etc.
|
37 |
-
|
38 |
-
Returns a list of DialogueItem.
|
39 |
-
"""
|
40 |
pattern = r"\*\*(.+?)\*\*:\s*(.+)"
|
41 |
matches = re.findall(pattern, edited_text)
|
42 |
|
@@ -71,14 +62,9 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
|
|
71 |
return items
|
72 |
|
73 |
def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
|
74 |
-
"""
|
75 |
-
Re-generates multi-speaker audio from user-edited DialogueItems,
|
76 |
-
then mixes with background music or custom music.
|
77 |
-
Returns (audio_bytes, transcript_str).
|
78 |
-
"""
|
79 |
audio_segments = []
|
80 |
transcript = ""
|
81 |
-
crossfade_duration = 50
|
82 |
|
83 |
for item in dialogue_items:
|
84 |
audio_file = generate_audio_mp3(item.text, item.speaker)
|
@@ -122,12 +108,6 @@ def generate_podcast(
|
|
122 |
sponsor_style,
|
123 |
custom_bg_music_path
|
124 |
):
|
125 |
-
"""
|
126 |
-
Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
|
127 |
-
Ensures female voice for host (Jane), male voice for guest (John).
|
128 |
-
Sponsor content is either separate or blended based on sponsor_style.
|
129 |
-
Returns (audio_bytes, transcript_str).
|
130 |
-
"""
|
131 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
132 |
if sum(sources) > 1:
|
133 |
return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
|
@@ -165,7 +145,6 @@ def generate_podcast(
|
|
165 |
except Exception as e:
|
166 |
return None, f"Error researching topic: {str(e)}"
|
167 |
|
168 |
-
from utils import truncate_text
|
169 |
text = truncate_text(text)
|
170 |
|
171 |
extra_instructions = []
|
@@ -183,12 +162,12 @@ def generate_podcast(
|
|
183 |
)
|
184 |
|
185 |
from prompts import SYSTEM_PROMPT
|
|
|
186 |
combined_instructions = "\n\n".join(extra_instructions).strip()
|
187 |
full_prompt = SYSTEM_PROMPT
|
188 |
if combined_instructions:
|
189 |
full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
|
190 |
|
191 |
-
from utils import generate_script, generate_audio_mp3, mix_with_bg_music
|
192 |
try:
|
193 |
script = generate_script(
|
194 |
full_prompt,
|
@@ -223,6 +202,7 @@ def generate_podcast(
|
|
223 |
|
224 |
final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
|
225 |
|
|
|
226 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
227 |
final_mix.export(temp_audio.name, format="mp3")
|
228 |
final_mp3_path = temp_audio.name
|
@@ -316,6 +296,7 @@ def main():
|
|
316 |
st.session_state["transcript"] = None
|
317 |
if "transcript_original" not in st.session_state:
|
318 |
st.session_state["transcript_original"] = None
|
|
|
319 |
# For Q&A
|
320 |
if "qa_count" not in st.session_state:
|
321 |
st.session_state["qa_count"] = 0
|
@@ -351,6 +332,7 @@ def main():
|
|
351 |
progress_bar.progress(75)
|
352 |
time.sleep(1.0)
|
353 |
|
|
|
354 |
audio_bytes, transcript = generate_podcast(
|
355 |
file,
|
356 |
url,
|
@@ -381,7 +363,6 @@ def main():
|
|
381 |
st.session_state["audio_bytes"] = audio_bytes
|
382 |
st.session_state["transcript"] = transcript
|
383 |
st.session_state["transcript_original"] = transcript
|
384 |
-
# Reset Q&A
|
385 |
st.session_state["qa_count"] = 0
|
386 |
st.session_state["conversation_history"] = ""
|
387 |
|
@@ -401,12 +382,25 @@ def main():
|
|
401 |
height=300
|
402 |
)
|
403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
if st.session_state["transcript_original"]:
|
405 |
highlighted_transcript = highlight_differences(
|
406 |
st.session_state["transcript_original"],
|
407 |
edited_text
|
408 |
)
|
409 |
-
|
410 |
st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
|
411 |
st.markdown(highlighted_transcript, unsafe_allow_html=True)
|
412 |
|
@@ -454,45 +448,70 @@ def main():
|
|
454 |
st.markdown("### Updated Transcript")
|
455 |
st.markdown(new_transcript)
|
456 |
|
457 |
-
#
|
458 |
-
|
459 |
-
|
460 |
-
st.markdown("## Post-Podcast Q&A")
|
461 |
used_questions = st.session_state["qa_count"]
|
462 |
remaining = MAX_QA_QUESTIONS - used_questions
|
463 |
|
464 |
if remaining > 0:
|
465 |
st.write(f"You can ask up to {remaining} more question(s).")
|
466 |
|
467 |
-
|
468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
469 |
|
470 |
if st.button("Submit Q&A"):
|
471 |
if used_questions >= MAX_QA_QUESTIONS:
|
472 |
st.warning("You have reached the Q&A limit.")
|
473 |
else:
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
tmp.write(audio_q.read())
|
479 |
-
local_audio_path = tmp.name
|
480 |
-
st.write("Transcribing your audio question...")
|
481 |
-
audio_transcript = transcribe_audio_deepgram(local_audio_path)
|
482 |
-
if audio_transcript:
|
483 |
-
question_text = audio_transcript
|
484 |
-
|
485 |
-
if not question_text:
|
486 |
-
st.warning("No question found (text or audio).")
|
487 |
else:
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
st.audio(ans_audio, format="audio/mp3")
|
492 |
-
st.markdown(f"**John**: {ans_text}")
|
493 |
-
st.session_state["qa_count"] += 1
|
494 |
else:
|
495 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
496 |
else:
|
497 |
st.write("You have used all 5 Q&A opportunities.")
|
498 |
|
|
|
21 |
)
|
22 |
from prompts import SYSTEM_PROMPT
|
23 |
|
24 |
+
# The new Q&A with mic
|
25 |
+
from qa import AudioBufferProcessor, handle_qa_exchange
|
26 |
+
from streamlit_webrtc import webrtc_streamer, WebRtcMode
|
27 |
|
28 |
+
MAX_QA_QUESTIONS = 5
|
29 |
|
30 |
def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
pattern = r"\*\*(.+?)\*\*:\s*(.+)"
|
32 |
matches = re.findall(pattern, edited_text)
|
33 |
|
|
|
62 |
return items
|
63 |
|
64 |
def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
|
|
|
|
|
|
|
|
|
|
|
65 |
audio_segments = []
|
66 |
transcript = ""
|
67 |
+
crossfade_duration = 50
|
68 |
|
69 |
for item in dialogue_items:
|
70 |
audio_file = generate_audio_mp3(item.text, item.speaker)
|
|
|
108 |
sponsor_style,
|
109 |
custom_bg_music_path
|
110 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
112 |
if sum(sources) > 1:
|
113 |
return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
|
|
|
145 |
except Exception as e:
|
146 |
return None, f"Error researching topic: {str(e)}"
|
147 |
|
|
|
148 |
text = truncate_text(text)
|
149 |
|
150 |
extra_instructions = []
|
|
|
162 |
)
|
163 |
|
164 |
from prompts import SYSTEM_PROMPT
|
165 |
+
from utils import generate_script, generate_audio_mp3, mix_with_bg_music
|
166 |
combined_instructions = "\n\n".join(extra_instructions).strip()
|
167 |
full_prompt = SYSTEM_PROMPT
|
168 |
if combined_instructions:
|
169 |
full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
|
170 |
|
|
|
171 |
try:
|
172 |
script = generate_script(
|
173 |
full_prompt,
|
|
|
202 |
|
203 |
final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
|
204 |
|
205 |
+
import tempfile
|
206 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
207 |
final_mix.export(temp_audio.name, format="mp3")
|
208 |
final_mp3_path = temp_audio.name
|
|
|
296 |
st.session_state["transcript"] = None
|
297 |
if "transcript_original" not in st.session_state:
|
298 |
st.session_state["transcript_original"] = None
|
299 |
+
|
300 |
# For Q&A
|
301 |
if "qa_count" not in st.session_state:
|
302 |
st.session_state["qa_count"] = 0
|
|
|
332 |
progress_bar.progress(75)
|
333 |
time.sleep(1.0)
|
334 |
|
335 |
+
from utils import truncate_text
|
336 |
audio_bytes, transcript = generate_podcast(
|
337 |
file,
|
338 |
url,
|
|
|
363 |
st.session_state["audio_bytes"] = audio_bytes
|
364 |
st.session_state["transcript"] = transcript
|
365 |
st.session_state["transcript_original"] = transcript
|
|
|
366 |
st.session_state["qa_count"] = 0
|
367 |
st.session_state["conversation_history"] = ""
|
368 |
|
|
|
382 |
height=300
|
383 |
)
|
384 |
|
385 |
+
from difflib import SequenceMatcher
|
386 |
+
def highlight_differences(original: str, edited: str) -> str:
|
387 |
+
matcher = SequenceMatcher(None, original.split(), edited.split())
|
388 |
+
highlighted = []
|
389 |
+
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
|
390 |
+
if opcode == 'equal':
|
391 |
+
highlighted.extend(original.split()[i1:i2])
|
392 |
+
elif opcode in ('replace', 'insert'):
|
393 |
+
added_words = edited.split()[j1:j2]
|
394 |
+
highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
|
395 |
+
elif opcode == 'delete':
|
396 |
+
pass
|
397 |
+
return ' '.join(highlighted)
|
398 |
+
|
399 |
if st.session_state["transcript_original"]:
|
400 |
highlighted_transcript = highlight_differences(
|
401 |
st.session_state["transcript_original"],
|
402 |
edited_text
|
403 |
)
|
|
|
404 |
st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
|
405 |
st.markdown(highlighted_transcript, unsafe_allow_html=True)
|
406 |
|
|
|
448 |
st.markdown("### Updated Transcript")
|
449 |
st.markdown(new_transcript)
|
450 |
|
451 |
+
# ----------- POST-PODCAST Q&A with Microphone -----------
|
452 |
+
st.markdown("## Post-Podcast Q&A (Using Microphone)")
|
453 |
+
|
|
|
454 |
used_questions = st.session_state["qa_count"]
|
455 |
remaining = MAX_QA_QUESTIONS - used_questions
|
456 |
|
457 |
if remaining > 0:
|
458 |
st.write(f"You can ask up to {remaining} more question(s).")
|
459 |
|
460 |
+
st.write("### Record Your Follow-Up Question:")
|
461 |
+
# Use streamlit-webrtc
|
462 |
+
from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
|
463 |
+
from qa import AudioBufferProcessor
|
464 |
+
RTC_CONFIGURATION = {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
|
465 |
+
|
466 |
+
webrtc_ctx = webrtc_streamer(
|
467 |
+
key="qna-audio-stream",
|
468 |
+
mode=WebRtcMode.SENDONLY,
|
469 |
+
rtc_configuration=RTC_CONFIGURATION,
|
470 |
+
media_stream_constraints={"audio": True, "video": False},
|
471 |
+
audio_processor_factory=AudioBufferProcessor
|
472 |
+
)
|
473 |
+
|
474 |
+
if "audio-processor" not in st.session_state:
|
475 |
+
st.session_state["audio-processor"] = None
|
476 |
+
|
477 |
+
if webrtc_ctx.state.playing and webrtc_ctx.audio_processor:
|
478 |
+
st.session_state["audio-processor"] = webrtc_ctx.audio_processor
|
479 |
+
|
480 |
+
# Once the user clicks "Stop", we can finalize the frames
|
481 |
+
if webrtc_ctx.state.status == webrtc_ctx.state.STATUS.DISCONNECTED:
|
482 |
+
st.write("Recording Stopped. You may now submit your question.")
|
483 |
|
484 |
if st.button("Submit Q&A"):
|
485 |
if used_questions >= MAX_QA_QUESTIONS:
|
486 |
st.warning("You have reached the Q&A limit.")
|
487 |
else:
|
488 |
+
# 1) Finalize WAV
|
489 |
+
processor = st.session_state.get("audio-processor")
|
490 |
+
if not processor or not getattr(processor, "frames", None):
|
491 |
+
st.warning("No recorded audio found. Please record your question first.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
492 |
else:
|
493 |
+
local_wav_path = processor.finalize_wav()
|
494 |
+
if not local_wav_path:
|
495 |
+
st.warning("No audio frames found. Please record again.")
|
|
|
|
|
|
|
496 |
else:
|
497 |
+
# 2) Transcribe with Deepgram (same logic as your old approach)
|
498 |
+
from qa import transcribe_audio_deepgram
|
499 |
+
st.write("Transcribing your voice question via Deepgram...")
|
500 |
+
question_text = transcribe_audio_deepgram(local_wav_path)
|
501 |
+
if not question_text.strip():
|
502 |
+
st.warning("No transcript found. Please try again.")
|
503 |
+
else:
|
504 |
+
st.write(f"**You asked**: {question_text}")
|
505 |
+
|
506 |
+
# 3) Generate an LLM answer
|
507 |
+
conversation_so_far = st.session_state["conversation_history"]
|
508 |
+
ans_audio, ans_text = handle_qa_exchange(conversation_so_far, question_text)
|
509 |
+
if ans_audio:
|
510 |
+
st.audio(ans_audio, format="audio/mp3")
|
511 |
+
st.markdown(f"**John**: {ans_text}")
|
512 |
+
st.session_state["qa_count"] += 1
|
513 |
+
else:
|
514 |
+
st.warning("No response could be generated.")
|
515 |
else:
|
516 |
st.write("You have used all 5 Q&A opportunities.")
|
517 |
|