siddhartharyaai commited on
Commit
073fcba
·
verified ·
1 Parent(s): bc65678

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -71
app.py CHANGED
@@ -21,12 +21,10 @@ from utils import (
21
  )
22
  from prompts import SYSTEM_PROMPT
23
 
24
- # The new Q&A with mic
25
- from qa import AudioBufferProcessor, handle_qa_exchange, transcribe_audio_deepgram
26
 
27
- from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
28
-
29
- MAX_QA_QUESTIONS = 5
30
 
31
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
32
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
@@ -65,7 +63,7 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
65
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
66
  audio_segments = []
67
  transcript = ""
68
- crossfade_duration = 50
69
 
70
  for item in dialogue_items:
71
  audio_file = generate_audio_mp3(item.text, item.speaker)
@@ -146,7 +144,6 @@ def generate_podcast(
146
  except Exception as e:
147
  return None, f"Error researching topic: {str(e)}"
148
 
149
- from utils import truncate_text
150
  text = truncate_text(text)
151
 
152
  extra_instructions = []
@@ -164,10 +161,9 @@ def generate_podcast(
164
  )
165
 
166
  from prompts import SYSTEM_PROMPT
167
- from utils import generate_script, generate_audio_mp3, mix_with_bg_music
168
- combined_instructions = "\n\n".join(extra_instructions).strip()
169
  full_prompt = SYSTEM_PROMPT
170
- if combined_instructions:
 
171
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
172
 
173
  try:
@@ -185,7 +181,7 @@ def generate_podcast(
185
 
186
  audio_segments = []
187
  transcript = ""
188
- crossfade_duration = 50
189
 
190
  try:
191
  for item in script.dialogue:
@@ -204,7 +200,6 @@ def generate_podcast(
204
 
205
  final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
206
 
207
- import tempfile
208
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
209
  final_mix.export(temp_audio.name, format="mp3")
210
  final_mp3_path = temp_audio.name
@@ -299,7 +294,6 @@ def main():
299
  if "transcript_original" not in st.session_state:
300
  st.session_state["transcript_original"] = None
301
 
302
- # For Q&A
303
  if "qa_count" not in st.session_state:
304
  st.session_state["qa_count"] = 0
305
  if "conversation_history" not in st.session_state:
@@ -367,6 +361,7 @@ def main():
367
  st.session_state["qa_count"] = 0
368
  st.session_state["conversation_history"] = ""
369
 
 
370
  if st.session_state["audio_bytes"]:
371
  st.audio(st.session_state["audio_bytes"], format='audio/mp3')
372
  st.download_button(
@@ -383,9 +378,8 @@ def main():
383
  height=300
384
  )
385
 
386
- from difflib import SequenceMatcher
387
  def highlight_differences(original: str, edited: str) -> str:
388
- matcher = SequenceMatcher(None, original.split(), edited.split())
389
  highlighted = []
390
  for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
391
  if opcode == 'equal':
@@ -449,8 +443,8 @@ def main():
449
  st.markdown("### Updated Transcript")
450
  st.markdown(new_transcript)
451
 
452
- # ----------- POST-PODCAST Q&A with Microphone -----------
453
- st.markdown("## Post-Podcast Q&A (Using Microphone)")
454
 
455
  used_questions = st.session_state["qa_count"]
456
  remaining = MAX_QA_QUESTIONS - used_questions
@@ -458,66 +452,27 @@ def main():
458
  if remaining > 0:
459
  st.write(f"You can ask up to {remaining} more question(s).")
460
 
461
- st.write("### Record Your Follow-Up Question:")
462
-
463
- # EXPLICIT STUN SERVER
464
- # So we can confirm ICE candidates are gathered
465
- new_rtc_config = RTCConfiguration(
466
- {
467
- "iceServers": [
468
- {"urls": ["stun:stun.l.google.com:19302"]}
469
- ]
470
- }
471
- )
472
-
473
- webrtc_ctx = webrtc_streamer(
474
- key="qna-audio-stream",
475
- mode=WebRtcMode.SENDONLY,
476
- rtc_configuration=new_rtc_config, # <--- STUN server explicitly set
477
- media_stream_constraints={"audio": True, "video": False},
478
- audio_processor_factory=AudioBufferProcessor
479
- )
480
-
481
- if "audio-processor" not in st.session_state:
482
- st.session_state["audio-processor"] = None
483
-
484
- # If the stream is currently playing, store the processor
485
- if webrtc_ctx.state.playing and webrtc_ctx.audio_processor:
486
- st.session_state["audio-processor"] = webrtc_ctx.audio_processor
487
-
488
- # Debug print: how many frames have arrived?
489
- st.write("Frames so far:", webrtc_ctx.audio_processor.frame_count)
490
-
491
- if not webrtc_ctx.state.playing:
492
- st.write("Recording Stopped. You may now submit your question.")
493
 
494
  if st.button("Submit Q&A"):
495
  if used_questions >= MAX_QA_QUESTIONS:
496
  st.warning("You have reached the Q&A limit.")
497
  else:
498
- processor = st.session_state.get("audio-processor")
499
- if not processor or not getattr(processor, "frames", None):
500
- st.warning("No recorded audio found. Please record your question first.")
501
  else:
502
- local_wav_path = processor.finalize_wav()
503
- if not local_wav_path:
504
- st.warning("No audio frames found. Please record again.")
505
- else:
506
- st.write("Transcribing your voice question via Deepgram...")
507
- question_text = transcribe_audio_deepgram(local_wav_path)
508
- if not question_text.strip():
509
- st.warning("No transcript found. Please try again.")
510
- else:
511
- st.write(f"**You asked**: {question_text}")
512
-
513
- conversation_so_far = st.session_state["conversation_history"]
514
- ans_audio, ans_text = handle_qa_exchange(conversation_so_far, question_text)
515
- if ans_audio:
516
- st.audio(ans_audio, format="audio/mp3")
517
- st.markdown(f"**John**: {ans_text}")
518
- st.session_state["qa_count"] += 1
519
- else:
520
- st.warning("No response could be generated.")
521
  else:
522
  st.write("You have used all 5 Q&A opportunities.")
523
 
 
21
  )
22
  from prompts import SYSTEM_PROMPT
23
 
24
+ # We are no longer importing streamlit-webrtc or mic-based Q&A
25
+ # from qa import AudioBufferProcessor, handle_qa_exchange, transcribe_audio_deepgram
26
 
27
+ MAX_QA_QUESTIONS = 5 # Up to 5 typed follow-up questions
 
 
28
 
29
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
30
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
 
63
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
64
  audio_segments = []
65
  transcript = ""
66
+ crossfade_duration = 50 # ms
67
 
68
  for item in dialogue_items:
69
  audio_file = generate_audio_mp3(item.text, item.speaker)
 
144
  except Exception as e:
145
  return None, f"Error researching topic: {str(e)}"
146
 
 
147
  text = truncate_text(text)
148
 
149
  extra_instructions = []
 
161
  )
162
 
163
  from prompts import SYSTEM_PROMPT
 
 
164
  full_prompt = SYSTEM_PROMPT
165
+ if extra_instructions:
166
+ combined_instructions = "\n\n".join(extra_instructions).strip()
167
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
168
 
169
  try:
 
181
 
182
  audio_segments = []
183
  transcript = ""
184
+ crossfade_duration = 50 # ms
185
 
186
  try:
187
  for item in script.dialogue:
 
200
 
201
  final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
202
 
 
203
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
204
  final_mix.export(temp_audio.name, format="mp3")
205
  final_mp3_path = temp_audio.name
 
294
  if "transcript_original" not in st.session_state:
295
  st.session_state["transcript_original"] = None
296
 
 
297
  if "qa_count" not in st.session_state:
298
  st.session_state["qa_count"] = 0
299
  if "conversation_history" not in st.session_state:
 
361
  st.session_state["qa_count"] = 0
362
  st.session_state["conversation_history"] = ""
363
 
364
+ # Display generated audio and transcript if present
365
  if st.session_state["audio_bytes"]:
366
  st.audio(st.session_state["audio_bytes"], format='audio/mp3')
367
  st.download_button(
 
378
  height=300
379
  )
380
 
 
381
  def highlight_differences(original: str, edited: str) -> str:
382
+ matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
383
  highlighted = []
384
  for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
385
  if opcode == 'equal':
 
443
  st.markdown("### Updated Transcript")
444
  st.markdown(new_transcript)
445
 
446
+ # ----------- POST-PODCAST Q&A (Text-based) -----------
447
+ st.markdown("## Post-Podcast Q&A (Text-based)")
448
 
449
  used_questions = st.session_state["qa_count"]
450
  remaining = MAX_QA_QUESTIONS - used_questions
 
452
  if remaining > 0:
453
  st.write(f"You can ask up to {remaining} more question(s).")
454
 
455
+ typed_q = st.text_input("Type your follow-up question here:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
 
457
  if st.button("Submit Q&A"):
458
  if used_questions >= MAX_QA_QUESTIONS:
459
  st.warning("You have reached the Q&A limit.")
460
  else:
461
+ question_text = typed_q.strip()
462
+ if not question_text:
463
+ st.warning("No question found. Please type something.")
464
  else:
465
+ st.write(f"**You asked**: {question_text}")
466
+
467
+ # We'll just store the question + a mock response for now
468
+ # or you can do an LLM call
469
+ # For example, let's do a minimal approach:
470
+ fake_answer = "That's a great question! I'd love to answer, but I'm currently text-based only."
471
+ st.write(f"**John**: {fake_answer}")
472
+
473
+ # Update conversation
474
+ st.session_state["conversation_history"] += f"\nUser: {question_text}\nJohn: {fake_answer}\n"
475
+ st.session_state["qa_count"] += 1
 
 
 
 
 
 
 
 
476
  else:
477
  st.write("You have used all 5 Q&A opportunities.")
478