Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -21,12 +21,10 @@ from utils import (
|
|
21 |
)
|
22 |
from prompts import SYSTEM_PROMPT
|
23 |
|
24 |
-
#
|
25 |
-
from qa import AudioBufferProcessor, handle_qa_exchange, transcribe_audio_deepgram
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
MAX_QA_QUESTIONS = 5
|
30 |
|
31 |
def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
|
32 |
pattern = r"\*\*(.+?)\*\*:\s*(.+)"
|
@@ -65,7 +63,7 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
|
|
65 |
def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
|
66 |
audio_segments = []
|
67 |
transcript = ""
|
68 |
-
crossfade_duration = 50
|
69 |
|
70 |
for item in dialogue_items:
|
71 |
audio_file = generate_audio_mp3(item.text, item.speaker)
|
@@ -146,7 +144,6 @@ def generate_podcast(
|
|
146 |
except Exception as e:
|
147 |
return None, f"Error researching topic: {str(e)}"
|
148 |
|
149 |
-
from utils import truncate_text
|
150 |
text = truncate_text(text)
|
151 |
|
152 |
extra_instructions = []
|
@@ -164,10 +161,9 @@ def generate_podcast(
|
|
164 |
)
|
165 |
|
166 |
from prompts import SYSTEM_PROMPT
|
167 |
-
from utils import generate_script, generate_audio_mp3, mix_with_bg_music
|
168 |
-
combined_instructions = "\n\n".join(extra_instructions).strip()
|
169 |
full_prompt = SYSTEM_PROMPT
|
170 |
-
if
|
|
|
171 |
full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
|
172 |
|
173 |
try:
|
@@ -185,7 +181,7 @@ def generate_podcast(
|
|
185 |
|
186 |
audio_segments = []
|
187 |
transcript = ""
|
188 |
-
crossfade_duration = 50
|
189 |
|
190 |
try:
|
191 |
for item in script.dialogue:
|
@@ -204,7 +200,6 @@ def generate_podcast(
|
|
204 |
|
205 |
final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
|
206 |
|
207 |
-
import tempfile
|
208 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
209 |
final_mix.export(temp_audio.name, format="mp3")
|
210 |
final_mp3_path = temp_audio.name
|
@@ -299,7 +294,6 @@ def main():
|
|
299 |
if "transcript_original" not in st.session_state:
|
300 |
st.session_state["transcript_original"] = None
|
301 |
|
302 |
-
# For Q&A
|
303 |
if "qa_count" not in st.session_state:
|
304 |
st.session_state["qa_count"] = 0
|
305 |
if "conversation_history" not in st.session_state:
|
@@ -367,6 +361,7 @@ def main():
|
|
367 |
st.session_state["qa_count"] = 0
|
368 |
st.session_state["conversation_history"] = ""
|
369 |
|
|
|
370 |
if st.session_state["audio_bytes"]:
|
371 |
st.audio(st.session_state["audio_bytes"], format='audio/mp3')
|
372 |
st.download_button(
|
@@ -383,9 +378,8 @@ def main():
|
|
383 |
height=300
|
384 |
)
|
385 |
|
386 |
-
from difflib import SequenceMatcher
|
387 |
def highlight_differences(original: str, edited: str) -> str:
|
388 |
-
matcher = SequenceMatcher(None, original.split(), edited.split())
|
389 |
highlighted = []
|
390 |
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
|
391 |
if opcode == 'equal':
|
@@ -449,8 +443,8 @@ def main():
|
|
449 |
st.markdown("### Updated Transcript")
|
450 |
st.markdown(new_transcript)
|
451 |
|
452 |
-
# ----------- POST-PODCAST Q&A
|
453 |
-
st.markdown("## Post-Podcast Q&A (
|
454 |
|
455 |
used_questions = st.session_state["qa_count"]
|
456 |
remaining = MAX_QA_QUESTIONS - used_questions
|
@@ -458,66 +452,27 @@ def main():
|
|
458 |
if remaining > 0:
|
459 |
st.write(f"You can ask up to {remaining} more question(s).")
|
460 |
|
461 |
-
st.
|
462 |
-
|
463 |
-
# EXPLICIT STUN SERVER
|
464 |
-
# So we can confirm ICE candidates are gathered
|
465 |
-
new_rtc_config = RTCConfiguration(
|
466 |
-
{
|
467 |
-
"iceServers": [
|
468 |
-
{"urls": ["stun:stun.l.google.com:19302"]}
|
469 |
-
]
|
470 |
-
}
|
471 |
-
)
|
472 |
-
|
473 |
-
webrtc_ctx = webrtc_streamer(
|
474 |
-
key="qna-audio-stream",
|
475 |
-
mode=WebRtcMode.SENDONLY,
|
476 |
-
rtc_configuration=new_rtc_config, # <--- STUN server explicitly set
|
477 |
-
media_stream_constraints={"audio": True, "video": False},
|
478 |
-
audio_processor_factory=AudioBufferProcessor
|
479 |
-
)
|
480 |
-
|
481 |
-
if "audio-processor" not in st.session_state:
|
482 |
-
st.session_state["audio-processor"] = None
|
483 |
-
|
484 |
-
# If the stream is currently playing, store the processor
|
485 |
-
if webrtc_ctx.state.playing and webrtc_ctx.audio_processor:
|
486 |
-
st.session_state["audio-processor"] = webrtc_ctx.audio_processor
|
487 |
-
|
488 |
-
# Debug print: how many frames have arrived?
|
489 |
-
st.write("Frames so far:", webrtc_ctx.audio_processor.frame_count)
|
490 |
-
|
491 |
-
if not webrtc_ctx.state.playing:
|
492 |
-
st.write("Recording Stopped. You may now submit your question.")
|
493 |
|
494 |
if st.button("Submit Q&A"):
|
495 |
if used_questions >= MAX_QA_QUESTIONS:
|
496 |
st.warning("You have reached the Q&A limit.")
|
497 |
else:
|
498 |
-
|
499 |
-
if not
|
500 |
-
st.warning("No
|
501 |
else:
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
conversation_so_far = st.session_state["conversation_history"]
|
514 |
-
ans_audio, ans_text = handle_qa_exchange(conversation_so_far, question_text)
|
515 |
-
if ans_audio:
|
516 |
-
st.audio(ans_audio, format="audio/mp3")
|
517 |
-
st.markdown(f"**John**: {ans_text}")
|
518 |
-
st.session_state["qa_count"] += 1
|
519 |
-
else:
|
520 |
-
st.warning("No response could be generated.")
|
521 |
else:
|
522 |
st.write("You have used all 5 Q&A opportunities.")
|
523 |
|
|
|
21 |
)
|
22 |
from prompts import SYSTEM_PROMPT
|
23 |
|
24 |
+
# We are no longer importing streamlit-webrtc or mic-based Q&A
|
25 |
+
# from qa import AudioBufferProcessor, handle_qa_exchange, transcribe_audio_deepgram
|
26 |
|
27 |
+
MAX_QA_QUESTIONS = 5 # Up to 5 typed follow-up questions
|
|
|
|
|
28 |
|
29 |
def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
|
30 |
pattern = r"\*\*(.+?)\*\*:\s*(.+)"
|
|
|
63 |
def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
|
64 |
audio_segments = []
|
65 |
transcript = ""
|
66 |
+
crossfade_duration = 50 # ms
|
67 |
|
68 |
for item in dialogue_items:
|
69 |
audio_file = generate_audio_mp3(item.text, item.speaker)
|
|
|
144 |
except Exception as e:
|
145 |
return None, f"Error researching topic: {str(e)}"
|
146 |
|
|
|
147 |
text = truncate_text(text)
|
148 |
|
149 |
extra_instructions = []
|
|
|
161 |
)
|
162 |
|
163 |
from prompts import SYSTEM_PROMPT
|
|
|
|
|
164 |
full_prompt = SYSTEM_PROMPT
|
165 |
+
if extra_instructions:
|
166 |
+
combined_instructions = "\n\n".join(extra_instructions).strip()
|
167 |
full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
|
168 |
|
169 |
try:
|
|
|
181 |
|
182 |
audio_segments = []
|
183 |
transcript = ""
|
184 |
+
crossfade_duration = 50 # ms
|
185 |
|
186 |
try:
|
187 |
for item in script.dialogue:
|
|
|
200 |
|
201 |
final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
|
202 |
|
|
|
203 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
204 |
final_mix.export(temp_audio.name, format="mp3")
|
205 |
final_mp3_path = temp_audio.name
|
|
|
294 |
if "transcript_original" not in st.session_state:
|
295 |
st.session_state["transcript_original"] = None
|
296 |
|
|
|
297 |
if "qa_count" not in st.session_state:
|
298 |
st.session_state["qa_count"] = 0
|
299 |
if "conversation_history" not in st.session_state:
|
|
|
361 |
st.session_state["qa_count"] = 0
|
362 |
st.session_state["conversation_history"] = ""
|
363 |
|
364 |
+
# Display generated audio and transcript if present
|
365 |
if st.session_state["audio_bytes"]:
|
366 |
st.audio(st.session_state["audio_bytes"], format='audio/mp3')
|
367 |
st.download_button(
|
|
|
378 |
height=300
|
379 |
)
|
380 |
|
|
|
381 |
def highlight_differences(original: str, edited: str) -> str:
|
382 |
+
matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
|
383 |
highlighted = []
|
384 |
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
|
385 |
if opcode == 'equal':
|
|
|
443 |
st.markdown("### Updated Transcript")
|
444 |
st.markdown(new_transcript)
|
445 |
|
446 |
+
# ----------- POST-PODCAST Q&A (Text-based) -----------
|
447 |
+
st.markdown("## Post-Podcast Q&A (Text-based)")
|
448 |
|
449 |
used_questions = st.session_state["qa_count"]
|
450 |
remaining = MAX_QA_QUESTIONS - used_questions
|
|
|
452 |
if remaining > 0:
|
453 |
st.write(f"You can ask up to {remaining} more question(s).")
|
454 |
|
455 |
+
typed_q = st.text_input("Type your follow-up question here:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
|
457 |
if st.button("Submit Q&A"):
|
458 |
if used_questions >= MAX_QA_QUESTIONS:
|
459 |
st.warning("You have reached the Q&A limit.")
|
460 |
else:
|
461 |
+
question_text = typed_q.strip()
|
462 |
+
if not question_text:
|
463 |
+
st.warning("No question found. Please type something.")
|
464 |
else:
|
465 |
+
st.write(f"**You asked**: {question_text}")
|
466 |
+
|
467 |
+
# We'll just store the question + a mock response for now
|
468 |
+
# or you can do an LLM call
|
469 |
+
# For example, let's do a minimal approach:
|
470 |
+
fake_answer = "That's a great question! I'd love to answer, but I'm currently text-based only."
|
471 |
+
st.write(f"**John**: {fake_answer}")
|
472 |
+
|
473 |
+
# Update conversation
|
474 |
+
st.session_state["conversation_history"] += f"\nUser: {question_text}\nJohn: {fake_answer}\n"
|
475 |
+
st.session_state["qa_count"] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
476 |
else:
|
477 |
st.write("You have used all 5 Q&A opportunities.")
|
478 |
|