siddhartharyaai commited on
Commit
d4f0b3c
·
verified ·
1 Parent(s): b0e78f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -55
app.py CHANGED
@@ -21,22 +21,13 @@ from utils import (
21
  )
22
  from prompts import SYSTEM_PROMPT
23
 
24
- # NEW: For Q&A
25
- from qa import transcribe_audio_deepgram, handle_qa_exchange
 
26
 
27
- MAX_QA_QUESTIONS = 5 # up to 5 voice/text questions
28
 
29
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
30
- """
31
- Looks for lines like:
32
- **Angela**: Hello
33
- **Dimitris**: Great topic...
34
- We treat 'Angela' as the raw display_speaker, 'Hello' as text.
35
- Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
36
- 'Dimitris' -> speaker='John' (if it matches guest_name), etc.
37
-
38
- Returns a list of DialogueItem.
39
- """
40
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
41
  matches = re.findall(pattern, edited_text)
42
 
@@ -71,14 +62,9 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
71
  return items
72
 
73
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
74
- """
75
- Re-generates multi-speaker audio from user-edited DialogueItems,
76
- then mixes with background music or custom music.
77
- Returns (audio_bytes, transcript_str).
78
- """
79
  audio_segments = []
80
  transcript = ""
81
- crossfade_duration = 50 # ms
82
 
83
  for item in dialogue_items:
84
  audio_file = generate_audio_mp3(item.text, item.speaker)
@@ -122,12 +108,6 @@ def generate_podcast(
122
  sponsor_style,
123
  custom_bg_music_path
124
  ):
125
- """
126
- Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
127
- Ensures female voice for host (Jane), male voice for guest (John).
128
- Sponsor content is either separate or blended based on sponsor_style.
129
- Returns (audio_bytes, transcript_str).
130
- """
131
  sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
132
  if sum(sources) > 1:
133
  return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
@@ -165,7 +145,6 @@ def generate_podcast(
165
  except Exception as e:
166
  return None, f"Error researching topic: {str(e)}"
167
 
168
- from utils import truncate_text
169
  text = truncate_text(text)
170
 
171
  extra_instructions = []
@@ -183,12 +162,12 @@ def generate_podcast(
183
  )
184
 
185
  from prompts import SYSTEM_PROMPT
 
186
  combined_instructions = "\n\n".join(extra_instructions).strip()
187
  full_prompt = SYSTEM_PROMPT
188
  if combined_instructions:
189
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
190
 
191
- from utils import generate_script, generate_audio_mp3, mix_with_bg_music
192
  try:
193
  script = generate_script(
194
  full_prompt,
@@ -223,6 +202,7 @@ def generate_podcast(
223
 
224
  final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
225
 
 
226
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
227
  final_mix.export(temp_audio.name, format="mp3")
228
  final_mp3_path = temp_audio.name
@@ -316,6 +296,7 @@ def main():
316
  st.session_state["transcript"] = None
317
  if "transcript_original" not in st.session_state:
318
  st.session_state["transcript_original"] = None
 
319
  # For Q&A
320
  if "qa_count" not in st.session_state:
321
  st.session_state["qa_count"] = 0
@@ -351,6 +332,7 @@ def main():
351
  progress_bar.progress(75)
352
  time.sleep(1.0)
353
 
 
354
  audio_bytes, transcript = generate_podcast(
355
  file,
356
  url,
@@ -381,7 +363,6 @@ def main():
381
  st.session_state["audio_bytes"] = audio_bytes
382
  st.session_state["transcript"] = transcript
383
  st.session_state["transcript_original"] = transcript
384
- # Reset Q&A
385
  st.session_state["qa_count"] = 0
386
  st.session_state["conversation_history"] = ""
387
 
@@ -401,12 +382,25 @@ def main():
401
  height=300
402
  )
403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  if st.session_state["transcript_original"]:
405
  highlighted_transcript = highlight_differences(
406
  st.session_state["transcript_original"],
407
  edited_text
408
  )
409
-
410
  st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
411
  st.markdown(highlighted_transcript, unsafe_allow_html=True)
412
 
@@ -454,45 +448,70 @@ def main():
454
  st.markdown("### Updated Transcript")
455
  st.markdown(new_transcript)
456
 
457
- # -----------------------
458
- # POST-PODCAST Q&A Logic
459
- # -----------------------
460
- st.markdown("## Post-Podcast Q&A")
461
  used_questions = st.session_state["qa_count"]
462
  remaining = MAX_QA_QUESTIONS - used_questions
463
 
464
  if remaining > 0:
465
  st.write(f"You can ask up to {remaining} more question(s).")
466
 
467
- typed_q = st.text_input("Type your follow-up question:")
468
- audio_q = st.file_uploader("Or upload an audio question (WAV, MP3)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
  if st.button("Submit Q&A"):
471
  if used_questions >= MAX_QA_QUESTIONS:
472
  st.warning("You have reached the Q&A limit.")
473
  else:
474
- question_text = typed_q.strip()
475
- if audio_q is not None:
476
- suffix = ".wav"
477
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
478
- tmp.write(audio_q.read())
479
- local_audio_path = tmp.name
480
- st.write("Transcribing your audio question...")
481
- audio_transcript = transcribe_audio_deepgram(local_audio_path)
482
- if audio_transcript:
483
- question_text = audio_transcript
484
-
485
- if not question_text:
486
- st.warning("No question found (text or audio).")
487
  else:
488
- st.write("Generating an answer...")
489
- ans_audio, ans_text = handle_qa_exchange(question_text)
490
- if ans_audio:
491
- st.audio(ans_audio, format="audio/mp3")
492
- st.markdown(f"**John**: {ans_text}")
493
- st.session_state["qa_count"] += 1
494
  else:
495
- st.warning("No response could be generated.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  else:
497
  st.write("You have used all 5 Q&A opportunities.")
498
 
 
21
  )
22
  from prompts import SYSTEM_PROMPT
23
 
24
+ # The new Q&A with mic
25
+ from qa import AudioBufferProcessor, handle_qa_exchange
26
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode
27
 
28
+ MAX_QA_QUESTIONS = 5
29
 
30
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
 
 
 
 
 
 
 
 
 
 
31
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
32
  matches = re.findall(pattern, edited_text)
33
 
 
62
  return items
63
 
64
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
 
 
 
 
 
65
  audio_segments = []
66
  transcript = ""
67
+ crossfade_duration = 50
68
 
69
  for item in dialogue_items:
70
  audio_file = generate_audio_mp3(item.text, item.speaker)
 
108
  sponsor_style,
109
  custom_bg_music_path
110
  ):
 
 
 
 
 
 
111
  sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
112
  if sum(sources) > 1:
113
  return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
 
145
  except Exception as e:
146
  return None, f"Error researching topic: {str(e)}"
147
 
 
148
  text = truncate_text(text)
149
 
150
  extra_instructions = []
 
162
  )
163
 
164
  from prompts import SYSTEM_PROMPT
165
+ from utils import generate_script, generate_audio_mp3, mix_with_bg_music
166
  combined_instructions = "\n\n".join(extra_instructions).strip()
167
  full_prompt = SYSTEM_PROMPT
168
  if combined_instructions:
169
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
170
 
 
171
  try:
172
  script = generate_script(
173
  full_prompt,
 
202
 
203
  final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
204
 
205
+ import tempfile
206
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
207
  final_mix.export(temp_audio.name, format="mp3")
208
  final_mp3_path = temp_audio.name
 
296
  st.session_state["transcript"] = None
297
  if "transcript_original" not in st.session_state:
298
  st.session_state["transcript_original"] = None
299
+
300
  # For Q&A
301
  if "qa_count" not in st.session_state:
302
  st.session_state["qa_count"] = 0
 
332
  progress_bar.progress(75)
333
  time.sleep(1.0)
334
 
335
+ from utils import truncate_text
336
  audio_bytes, transcript = generate_podcast(
337
  file,
338
  url,
 
363
  st.session_state["audio_bytes"] = audio_bytes
364
  st.session_state["transcript"] = transcript
365
  st.session_state["transcript_original"] = transcript
 
366
  st.session_state["qa_count"] = 0
367
  st.session_state["conversation_history"] = ""
368
 
 
382
  height=300
383
  )
384
 
385
+ from difflib import SequenceMatcher
386
+ def highlight_differences(original: str, edited: str) -> str:
387
+ matcher = SequenceMatcher(None, original.split(), edited.split())
388
+ highlighted = []
389
+ for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
390
+ if opcode == 'equal':
391
+ highlighted.extend(original.split()[i1:i2])
392
+ elif opcode in ('replace', 'insert'):
393
+ added_words = edited.split()[j1:j2]
394
+ highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
395
+ elif opcode == 'delete':
396
+ pass
397
+ return ' '.join(highlighted)
398
+
399
  if st.session_state["transcript_original"]:
400
  highlighted_transcript = highlight_differences(
401
  st.session_state["transcript_original"],
402
  edited_text
403
  )
 
404
  st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
405
  st.markdown(highlighted_transcript, unsafe_allow_html=True)
406
 
 
448
  st.markdown("### Updated Transcript")
449
  st.markdown(new_transcript)
450
 
451
+ # ----------- POST-PODCAST Q&A with Microphone -----------
452
+ st.markdown("## Post-Podcast Q&A (Using Microphone)")
453
+
 
454
  used_questions = st.session_state["qa_count"]
455
  remaining = MAX_QA_QUESTIONS - used_questions
456
 
457
  if remaining > 0:
458
  st.write(f"You can ask up to {remaining} more question(s).")
459
 
460
+ st.write("### Record Your Follow-Up Question:")
461
+ # Use streamlit-webrtc
462
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
463
+ from qa import AudioBufferProcessor
464
+ RTC_CONFIGURATION = {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
465
+
466
+ webrtc_ctx = webrtc_streamer(
467
+ key="qna-audio-stream",
468
+ mode=WebRtcMode.SENDONLY,
469
+ rtc_configuration=RTC_CONFIGURATION,
470
+ media_stream_constraints={"audio": True, "video": False},
471
+ audio_processor_factory=AudioBufferProcessor
472
+ )
473
+
474
+ if "audio-processor" not in st.session_state:
475
+ st.session_state["audio-processor"] = None
476
+
477
+ if webrtc_ctx.state.playing and webrtc_ctx.audio_processor:
478
+ st.session_state["audio-processor"] = webrtc_ctx.audio_processor
479
+
480
+ # Once the user clicks "Stop", we can finalize the frames
481
+ if webrtc_ctx.state.status == webrtc_ctx.state.STATUS.DISCONNECTED:
482
+ st.write("Recording Stopped. You may now submit your question.")
483
 
484
  if st.button("Submit Q&A"):
485
  if used_questions >= MAX_QA_QUESTIONS:
486
  st.warning("You have reached the Q&A limit.")
487
  else:
488
+ # 1) Finalize WAV
489
+ processor = st.session_state.get("audio-processor")
490
+ if not processor or not getattr(processor, "frames", None):
491
+ st.warning("No recorded audio found. Please record your question first.")
 
 
 
 
 
 
 
 
 
492
  else:
493
+ local_wav_path = processor.finalize_wav()
494
+ if not local_wav_path:
495
+ st.warning("No audio frames found. Please record again.")
 
 
 
496
  else:
497
+ # 2) Transcribe with Deepgram (same logic as your old approach)
498
+ from qa import transcribe_audio_deepgram
499
+ st.write("Transcribing your voice question via Deepgram...")
500
+ question_text = transcribe_audio_deepgram(local_wav_path)
501
+ if not question_text.strip():
502
+ st.warning("No transcript found. Please try again.")
503
+ else:
504
+ st.write(f"**You asked**: {question_text}")
505
+
506
+ # 3) Generate an LLM answer
507
+ conversation_so_far = st.session_state["conversation_history"]
508
+ ans_audio, ans_text = handle_qa_exchange(conversation_so_far, question_text)
509
+ if ans_audio:
510
+ st.audio(ans_audio, format="audio/mp3")
511
+ st.markdown(f"**John**: {ans_text}")
512
+ st.session_state["qa_count"] += 1
513
+ else:
514
+ st.warning("No response could be generated.")
515
  else:
516
  st.write("You have used all 5 Q&A opportunities.")
517