siddhartharyaai commited on
Commit
c2b1adf
·
verified ·
1 Parent(s): 073fcba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -37
app.py CHANGED
@@ -21,12 +21,22 @@ from utils import (
21
  )
22
  from prompts import SYSTEM_PROMPT
23
 
24
- # We are no longer importing streamlit-webrtc or mic-based Q&A
25
- # from qa import AudioBufferProcessor, handle_qa_exchange, transcribe_audio_deepgram
26
 
27
- MAX_QA_QUESTIONS = 5 # Up to 5 typed follow-up questions
28
 
29
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
 
 
 
 
 
 
 
 
 
 
30
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
31
  matches = re.findall(pattern, edited_text)
32
 
@@ -61,6 +71,11 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
61
  return items
62
 
63
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
 
 
 
 
 
64
  audio_segments = []
65
  transcript = ""
66
  crossfade_duration = 50 # ms
@@ -107,6 +122,12 @@ def generate_podcast(
107
  sponsor_style,
108
  custom_bg_music_path
109
  ):
 
 
 
 
 
 
110
  sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
111
  if sum(sources) > 1:
112
  return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
@@ -144,6 +165,7 @@ def generate_podcast(
144
  except Exception as e:
145
  return None, f"Error researching topic: {str(e)}"
146
 
 
147
  text = truncate_text(text)
148
 
149
  extra_instructions = []
@@ -161,11 +183,12 @@ def generate_podcast(
161
  )
162
 
163
  from prompts import SYSTEM_PROMPT
 
164
  full_prompt = SYSTEM_PROMPT
165
- if extra_instructions:
166
- combined_instructions = "\n\n".join(extra_instructions).strip()
167
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
168
 
 
169
  try:
170
  script = generate_script(
171
  full_prompt,
@@ -181,7 +204,7 @@ def generate_podcast(
181
 
182
  audio_segments = []
183
  transcript = ""
184
- crossfade_duration = 50 # ms
185
 
186
  try:
187
  for item in script.dialogue:
@@ -293,7 +316,7 @@ def main():
293
  st.session_state["transcript"] = None
294
  if "transcript_original" not in st.session_state:
295
  st.session_state["transcript_original"] = None
296
-
297
  if "qa_count" not in st.session_state:
298
  st.session_state["qa_count"] = 0
299
  if "conversation_history" not in st.session_state:
@@ -358,10 +381,10 @@ def main():
358
  st.session_state["audio_bytes"] = audio_bytes
359
  st.session_state["transcript"] = transcript
360
  st.session_state["transcript_original"] = transcript
 
361
  st.session_state["qa_count"] = 0
362
  st.session_state["conversation_history"] = ""
363
 
364
- # Display generated audio and transcript if present
365
  if st.session_state["audio_bytes"]:
366
  st.audio(st.session_state["audio_bytes"], format='audio/mp3')
367
  st.download_button(
@@ -378,24 +401,12 @@ def main():
378
  height=300
379
  )
380
 
381
- def highlight_differences(original: str, edited: str) -> str:
382
- matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
383
- highlighted = []
384
- for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
385
- if opcode == 'equal':
386
- highlighted.extend(original.split()[i1:i2])
387
- elif opcode in ('replace', 'insert'):
388
- added_words = edited.split()[j1:j2]
389
- highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
390
- elif opcode == 'delete':
391
- pass
392
- return ' '.join(highlighted)
393
-
394
  if st.session_state["transcript_original"]:
395
  highlighted_transcript = highlight_differences(
396
  st.session_state["transcript_original"],
397
  edited_text
398
  )
 
399
  st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
400
  st.markdown(highlighted_transcript, unsafe_allow_html=True)
401
 
@@ -443,36 +454,45 @@ def main():
443
  st.markdown("### Updated Transcript")
444
  st.markdown(new_transcript)
445
 
446
- # ----------- POST-PODCAST Q&A (Text-based) -----------
447
- st.markdown("## Post-Podcast Q&A (Text-based)")
448
-
 
449
  used_questions = st.session_state["qa_count"]
450
  remaining = MAX_QA_QUESTIONS - used_questions
451
 
452
  if remaining > 0:
453
  st.write(f"You can ask up to {remaining} more question(s).")
454
 
455
- typed_q = st.text_input("Type your follow-up question here:")
 
456
 
457
  if st.button("Submit Q&A"):
458
  if used_questions >= MAX_QA_QUESTIONS:
459
  st.warning("You have reached the Q&A limit.")
460
  else:
461
  question_text = typed_q.strip()
 
 
 
 
 
 
 
 
 
 
462
  if not question_text:
463
- st.warning("No question found. Please type something.")
464
  else:
465
- st.write(f"**You asked**: {question_text}")
466
-
467
- # We'll just store the question + a mock response for now
468
- # or you can do an LLM call
469
- # For example, let's do a minimal approach:
470
- fake_answer = "That's a great question! I'd love to answer, but I'm currently text-based only."
471
- st.write(f"**John**: {fake_answer}")
472
-
473
- # Update conversation
474
- st.session_state["conversation_history"] += f"\nUser: {question_text}\nJohn: {fake_answer}\n"
475
- st.session_state["qa_count"] += 1
476
  else:
477
  st.write("You have used all 5 Q&A opportunities.")
478
 
 
21
  )
22
  from prompts import SYSTEM_PROMPT
23
 
24
+ # NEW: For Q&A
25
+ from qa import transcribe_audio_deepgram, handle_qa_exchange
26
 
27
+ MAX_QA_QUESTIONS = 5 # up to 5 voice/text questions
28
 
29
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
30
+ """
31
+ Looks for lines like:
32
+ **Angela**: Hello
33
+ **Dimitris**: Great topic...
34
+ We treat 'Angela' as the raw display_speaker, 'Hello' as text.
35
+ Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
36
+ 'Dimitris' -> speaker='John' (if it matches guest_name), etc.
37
+
38
+ Returns a list of DialogueItem.
39
+ """
40
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
41
  matches = re.findall(pattern, edited_text)
42
 
 
71
  return items
72
 
73
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
74
+ """
75
+ Re-generates multi-speaker audio from user-edited DialogueItems,
76
+ then mixes with background music or custom music.
77
+ Returns (audio_bytes, transcript_str).
78
+ """
79
  audio_segments = []
80
  transcript = ""
81
  crossfade_duration = 50 # ms
 
122
  sponsor_style,
123
  custom_bg_music_path
124
  ):
125
+ """
126
+ Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
127
+ Ensures female voice for host (Jane), male voice for guest (John).
128
+ Sponsor content is either separate or blended based on sponsor_style.
129
+ Returns (audio_bytes, transcript_str).
130
+ """
131
  sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
132
  if sum(sources) > 1:
133
  return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
 
165
  except Exception as e:
166
  return None, f"Error researching topic: {str(e)}"
167
 
168
+ from utils import truncate_text
169
  text = truncate_text(text)
170
 
171
  extra_instructions = []
 
183
  )
184
 
185
  from prompts import SYSTEM_PROMPT
186
+ combined_instructions = "\n\n".join(extra_instructions).strip()
187
  full_prompt = SYSTEM_PROMPT
188
+ if combined_instructions:
 
189
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
190
 
191
+ from utils import generate_script, generate_audio_mp3, mix_with_bg_music
192
  try:
193
  script = generate_script(
194
  full_prompt,
 
204
 
205
  audio_segments = []
206
  transcript = ""
207
+ crossfade_duration = 50
208
 
209
  try:
210
  for item in script.dialogue:
 
316
  st.session_state["transcript"] = None
317
  if "transcript_original" not in st.session_state:
318
  st.session_state["transcript_original"] = None
319
+ # For Q&A
320
  if "qa_count" not in st.session_state:
321
  st.session_state["qa_count"] = 0
322
  if "conversation_history" not in st.session_state:
 
381
  st.session_state["audio_bytes"] = audio_bytes
382
  st.session_state["transcript"] = transcript
383
  st.session_state["transcript_original"] = transcript
384
+ # Reset Q&A
385
  st.session_state["qa_count"] = 0
386
  st.session_state["conversation_history"] = ""
387
 
 
388
  if st.session_state["audio_bytes"]:
389
  st.audio(st.session_state["audio_bytes"], format='audio/mp3')
390
  st.download_button(
 
401
  height=300
402
  )
403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  if st.session_state["transcript_original"]:
405
  highlighted_transcript = highlight_differences(
406
  st.session_state["transcript_original"],
407
  edited_text
408
  )
409
+
410
  st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
411
  st.markdown(highlighted_transcript, unsafe_allow_html=True)
412
 
 
454
  st.markdown("### Updated Transcript")
455
  st.markdown(new_transcript)
456
 
457
+ # -----------------------
458
+ # POST-PODCAST Q&A Logic
459
+ # -----------------------
460
+ st.markdown("## Post-Podcast Q&A")
461
  used_questions = st.session_state["qa_count"]
462
  remaining = MAX_QA_QUESTIONS - used_questions
463
 
464
  if remaining > 0:
465
  st.write(f"You can ask up to {remaining} more question(s).")
466
 
467
+ typed_q = st.text_input("Type your follow-up question:")
468
+ audio_q = st.file_uploader("Or upload an audio question (WAV, MP3)")
469
 
470
  if st.button("Submit Q&A"):
471
  if used_questions >= MAX_QA_QUESTIONS:
472
  st.warning("You have reached the Q&A limit.")
473
  else:
474
  question_text = typed_q.strip()
475
+ if audio_q is not None:
476
+ suffix = ".wav"
477
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
478
+ tmp.write(audio_q.read())
479
+ local_audio_path = tmp.name
480
+ st.write("Transcribing your audio question...")
481
+ audio_transcript = transcribe_audio_deepgram(local_audio_path)
482
+ if audio_transcript:
483
+ question_text = audio_transcript
484
+
485
  if not question_text:
486
+ st.warning("No question found (text or audio).")
487
  else:
488
+ st.write("Generating an answer...")
489
+ ans_audio, ans_text = handle_qa_exchange(question_text)
490
+ if ans_audio:
491
+ st.audio(ans_audio, format="audio/mp3")
492
+ st.markdown(f"**John**: {ans_text}")
493
+ st.session_state["qa_count"] += 1
494
+ else:
495
+ st.warning("No response could be generated.")
 
 
 
496
  else:
497
  st.write("You have used all 5 Q&A opportunities.")
498