siddhartharyaai commited on
Commit
f87dac8
·
verified ·
1 Parent(s): 625fef7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -46
app.py CHANGED
@@ -20,23 +20,12 @@ from utils import (
20
  DialogueItem
21
  )
22
  from prompts import SYSTEM_PROMPT
23
-
24
- # NEW: For Q&A
25
  from qa import transcribe_audio_deepgram, handle_qa_exchange
26
 
27
  MAX_QA_QUESTIONS = 5
28
 
29
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
30
- """
31
- Looks for lines like:
32
- **Angela**: Hello
33
- **Dimitris**: Great topic...
34
- We treat 'Angela' as the raw display_speaker, 'Hello' as text.
35
- Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
36
- 'Dimitris' -> speaker='John' (if it matches guest_name), etc.
37
-
38
- Returns a list of DialogueItem.
39
- """
40
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
41
  matches = re.findall(pattern, edited_text)
42
 
@@ -71,17 +60,11 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
71
  return items
72
 
73
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
74
- """
75
- Re-generates multi-speaker audio from user-edited DialogueItems,
76
- then mixes with background music or custom music.
77
- Returns (audio_bytes, transcript_str).
78
- """
79
  audio_segments = []
80
  transcript = ""
81
- crossfade_duration = 50 # ms
82
 
83
  for item in dialogue_items:
84
- # Generate TTS for each line
85
  audio_file = generate_audio_mp3(item.text, item.speaker)
86
  seg = AudioSegment.from_file(audio_file, format="mp3")
87
  audio_segments.append(seg)
@@ -123,12 +106,6 @@ def generate_podcast(
123
  sponsor_style,
124
  custom_bg_music_path
125
  ):
126
- """
127
- Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
128
- Ensures female voice for host (Jane), male voice for guest (John).
129
- If sponsor_content is empty, we skip sponsor instructions entirely.
130
- Returns (audio_bytes, transcript_str).
131
- """
132
  sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
133
  if sum(sources) > 1:
134
  return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
@@ -166,7 +143,6 @@ def generate_podcast(
166
  except Exception as e:
167
  return None, f"Error researching topic: {str(e)}"
168
 
169
- from utils import truncate_text
170
  text = truncate_text(text)
171
 
172
  extra_instructions = []
@@ -178,7 +154,7 @@ def generate_podcast(
178
  if user_specs.strip():
179
  extra_instructions.append(f"Additional User Instructions: {user_specs}")
180
 
181
- # If sponsor_content is blank, skip sponsor instructions
182
  sponsor_instructions_present = False
183
  if sponsor_content.strip():
184
  extra_instructions.append(
@@ -189,14 +165,11 @@ def generate_podcast(
189
  from prompts import SYSTEM_PROMPT
190
  combined_instructions = "\n\n".join(extra_instructions).strip()
191
  full_prompt = SYSTEM_PROMPT
192
-
193
  if combined_instructions:
194
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
195
 
196
  from utils import generate_script, generate_audio_mp3, mix_with_bg_music
197
  try:
198
- # If sponsor is empty, we pass the sponsor_style anyway,
199
- # but the model won't see sponsor instructions (since none were appended).
200
  script = generate_script(
201
  full_prompt,
202
  text,
@@ -204,7 +177,7 @@ def generate_podcast(
204
  f"{length_minutes} Mins",
205
  host_name=host_name or "Jane",
206
  guest_name=guest_name or "John",
207
- sponsor_style=sponsor_style
208
  )
209
  except Exception as e:
210
  return None, f"Error generating script: {str(e)}"
@@ -273,7 +246,7 @@ def main():
273
  "conversational podcast.\n"
274
  "Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
275
  "### How to use:\n"
276
- "1. **Provide one source:** PDF Files, Website URL, YouTube videos, or a Topic to Research.\n"
277
  "2. **Choose the tone and the target duration.**\n"
278
  "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, "
279
  " you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
@@ -305,10 +278,7 @@ def main():
305
 
306
  user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
307
  sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
308
- sponsor_style = st.selectbox(
309
- "Sponsor Integration Style",
310
- ["Separate Break", "Blended"]
311
- )
312
 
313
  custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
314
  custom_bg_music_path = None
@@ -411,7 +381,6 @@ def main():
411
  st.session_state["transcript_original"],
412
  edited_text
413
  )
414
-
415
  st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
416
  st.markdown(highlighted_transcript, unsafe_allow_html=True)
417
 
@@ -459,9 +428,9 @@ def main():
459
  st.markdown("### Updated Transcript")
460
  st.markdown(new_transcript)
461
 
462
- # -----------------------
463
- # POST-PODCAST Q&A Logic
464
- # -----------------------
465
  st.markdown("## Post-Podcast Q&A")
466
  used_questions = st.session_state["qa_count"]
467
  remaining = MAX_QA_QUESTIONS - used_questions
@@ -470,8 +439,8 @@ def main():
470
  st.write(f"You can ask up to {remaining} more question(s).")
471
 
472
  typed_q = st.text_input("Type your follow-up question:")
473
- # If on Streamlit >= 1.41.0, you could do st.audio_input for direct mic recordings
474
- audio_q = st.file_uploader("Or upload an audio question (WAV, MP3)")
475
 
476
  if st.button("Submit Q&A"):
477
  if used_questions >= MAX_QA_QUESTIONS:
@@ -479,11 +448,10 @@ def main():
479
  else:
480
  question_text = typed_q.strip()
481
  if audio_q is not None:
482
- suffix = ".wav"
483
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
484
- tmp.write(audio_q.read())
485
  local_audio_path = tmp.name
486
- st.write("Transcribing your audio question...")
487
  audio_transcript = transcribe_audio_deepgram(local_audio_path)
488
  if audio_transcript:
489
  question_text = audio_transcript
 
20
  DialogueItem
21
  )
22
  from prompts import SYSTEM_PROMPT
23
+ # Q&A
 
24
  from qa import transcribe_audio_deepgram, handle_qa_exchange
25
 
26
  MAX_QA_QUESTIONS = 5
27
 
28
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
 
 
 
 
 
 
 
 
 
 
29
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
30
  matches = re.findall(pattern, edited_text)
31
 
 
60
  return items
61
 
62
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
 
 
 
 
 
63
  audio_segments = []
64
  transcript = ""
65
+ crossfade_duration = 50
66
 
67
  for item in dialogue_items:
 
68
  audio_file = generate_audio_mp3(item.text, item.speaker)
69
  seg = AudioSegment.from_file(audio_file, format="mp3")
70
  audio_segments.append(seg)
 
106
  sponsor_style,
107
  custom_bg_music_path
108
  ):
 
 
 
 
 
 
109
  sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
110
  if sum(sources) > 1:
111
  return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
 
143
  except Exception as e:
144
  return None, f"Error researching topic: {str(e)}"
145
 
 
146
  text = truncate_text(text)
147
 
148
  extra_instructions = []
 
154
  if user_specs.strip():
155
  extra_instructions.append(f"Additional User Instructions: {user_specs}")
156
 
157
+ # If user provided sponsor content, we pass it along; otherwise skip
158
  sponsor_instructions_present = False
159
  if sponsor_content.strip():
160
  extra_instructions.append(
 
165
  from prompts import SYSTEM_PROMPT
166
  combined_instructions = "\n\n".join(extra_instructions).strip()
167
  full_prompt = SYSTEM_PROMPT
 
168
  if combined_instructions:
169
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
170
 
171
  from utils import generate_script, generate_audio_mp3, mix_with_bg_music
172
  try:
 
 
173
  script = generate_script(
174
  full_prompt,
175
  text,
 
177
  f"{length_minutes} Mins",
178
  host_name=host_name or "Jane",
179
  guest_name=guest_name or "John",
180
+ sponsor_style=sponsor_style # If sponsor is empty, no sponsor lines appended
181
  )
182
  except Exception as e:
183
  return None, f"Error generating script: {str(e)}"
 
246
  "conversational podcast.\n"
247
  "Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
248
  "### How to use:\n"
249
+ "1. **Provide one source:** PDF Files, Website URL, YouTube link or a Topic to Research.\n"
250
  "2. **Choose the tone and the target duration.**\n"
251
  "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, "
252
  " you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
 
278
 
279
  user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
280
  sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
281
+ sponsor_style = st.selectbox("Sponsor Integration Style", ["Separate Break", "Blended"])
 
 
 
282
 
283
  custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
284
  custom_bg_music_path = None
 
381
  st.session_state["transcript_original"],
382
  edited_text
383
  )
 
384
  st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
385
  st.markdown(highlighted_transcript, unsafe_allow_html=True)
386
 
 
428
  st.markdown("### Updated Transcript")
429
  st.markdown(new_transcript)
430
 
431
+ # -------------------------------------------
432
+ # Post-Podcast Q&A using st.audio_input():
433
+ # -------------------------------------------
434
  st.markdown("## Post-Podcast Q&A")
435
  used_questions = st.session_state["qa_count"]
436
  remaining = MAX_QA_QUESTIONS - used_questions
 
439
  st.write(f"You can ask up to {remaining} more question(s).")
440
 
441
  typed_q = st.text_input("Type your follow-up question:")
442
+ # Replacing file_uploader with st.audio_input (Streamlit >= 1.41)
443
+ audio_q = st.audio_input("Or record your voice question")
444
 
445
  if st.button("Submit Q&A"):
446
  if used_questions >= MAX_QA_QUESTIONS:
 
448
  else:
449
  question_text = typed_q.strip()
450
  if audio_q is not None:
451
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
452
+ tmp.write(audio_q.getvalue())
 
453
  local_audio_path = tmp.name
454
+ st.write("Transcribing your audio question via Deepgram...")
455
  audio_transcript = transcribe_audio_deepgram(local_audio_path)
456
  if audio_transcript:
457
  question_text = audio_transcript