siddhartharyaai commited on
Commit
117149e
Β·
verified Β·
1 Parent(s): 2752da2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -41
app.py CHANGED
@@ -17,32 +17,37 @@ from utils import (
17
  transcribe_youtube_video,
18
  research_topic,
19
  mix_with_bg_music,
20
- DialogueItem # so we can construct items
21
  )
22
  from prompts import SYSTEM_PROMPT
23
 
 
24
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
25
  """
26
  Looks for lines like:
27
  **Angela**: Hello
28
  **Dimitris**: Great topic...
29
  We treat 'Angela' as the raw display_speaker, 'Hello' as text.
30
- Then we map 'Angela' -> speaker='Jane' if it matches host_name (case-insensitive),
31
- 'Dimitris' -> speaker='John' if it matches guest_name, else default to 'Jane'.
32
- Returns a list of (DialogueItem).
 
33
  """
 
 
34
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
35
  matches = re.findall(pattern, edited_text)
36
 
37
  items = []
 
 
38
  if not matches:
39
- # No lines found, treat entire text as if it's host
40
  raw_name = host_name or "Jane"
41
  text_line = edited_text.strip()
42
  speaker = "Jane"
 
43
  if raw_name.lower() == guest_name.lower():
44
  speaker = "John"
45
- # build a single item
46
  item = DialogueItem(
47
  speaker=speaker,
48
  display_speaker=raw_name,
@@ -51,17 +56,15 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
51
  items.append(item)
52
  return items
53
 
54
- # If we have multiple lines
55
  for (raw_name, text_line) in matches:
56
- # Map to TTS speaker
57
  if raw_name.lower() == host_name.lower():
58
- # host -> female
59
  speaker = "Jane"
60
  elif raw_name.lower() == guest_name.lower():
61
- # guest -> male
62
  speaker = "John"
63
  else:
64
- # unknown -> default to female host
65
  speaker = "Jane"
66
  item = DialogueItem(
67
  speaker=speaker,
@@ -69,36 +72,42 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
69
  text=text_line
70
  )
71
  items.append(item)
 
72
  return items
73
 
 
74
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
75
  """
76
  Re-generates multi-speaker audio from user-edited DialogueItems,
77
- then mixes with background music (bg_music.mp3) or custom music.
78
- Returns final audio bytes and updated transcript (using display_speaker).
 
79
  """
80
  audio_segments = []
81
  transcript = ""
82
- crossfade_duration = 50 # in ms
83
 
84
  for item in dialogue_items:
 
85
  audio_file = generate_audio_mp3(item.text, item.speaker)
86
  seg = AudioSegment.from_file(audio_file, format="mp3")
87
  audio_segments.append(seg)
88
- # Use item.display_speaker for the text transcript
89
  transcript += f"**{item.display_speaker}**: {item.text}\n\n"
90
  os.remove(audio_file)
91
 
92
  if not audio_segments:
93
  return None, "No audio segments were generated."
94
 
95
- # Combine spoken segments sequentially
96
  combined_spoken = audio_segments[0]
97
  for seg in audio_segments[1:]:
98
  combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
99
 
 
100
  final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
101
 
 
102
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
103
  final_mix.export(temp_audio.name, format="mp3")
104
  final_mp3_path = temp_audio.name
@@ -109,6 +118,7 @@ def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
109
 
110
  return audio_bytes, transcript
111
 
 
112
  def generate_podcast(
113
  file,
114
  url,
@@ -122,21 +132,24 @@ def generate_podcast(
122
  guest_desc,
123
  user_specs,
124
  sponsor_content,
 
125
  custom_bg_music_path
126
  ):
127
  """
128
  Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
129
- Uses female voice (Jane) for host, male voice (John) for guest.
130
- Display_speaker is user-chosen name, speaker is "Jane" or "John".
131
 
132
  Returns (audio_bytes, transcript_str).
133
  """
 
134
  sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
135
  if sum(sources) > 1:
136
- return None, "Provide only one input (PDF, URL, YouTube, or Research topic)."
137
  if not any(sources):
138
  return None, "Please provide at least one source."
139
 
 
140
  text = ""
141
  if file:
142
  try:
@@ -168,32 +181,35 @@ def generate_podcast(
168
  except Exception as e:
169
  return None, f"Error researching topic: {str(e)}"
170
 
171
- # Truncate if needed
172
  text = truncate_text(text)
173
 
174
- # Build extra instructions
175
  extra_instructions = []
176
 
 
177
  if host_name or guest_name:
178
- h = f"Host: {host_name or 'Jane'} - {host_desc or 'a curious host'}."
179
- g = f"Guest: {guest_name or 'John'} - {guest_desc or 'an expert'}."
180
- extra_instructions.append(f"{h}\n{g}")
181
 
 
182
  if user_specs.strip():
183
  extra_instructions.append(f"Additional User Instructions: {user_specs}")
184
 
 
185
  if sponsor_content.strip():
186
  extra_instructions.append(
187
- "Please include a short sponsored advertisement. The sponsor text is as follows:\n"
188
- + sponsor_content
189
  )
190
 
 
191
  combined_instructions = "\n\n".join(extra_instructions).strip()
192
  full_prompt = SYSTEM_PROMPT
193
  if combined_instructions:
194
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
195
 
196
- # Use "generate_script" with host/guest name so it can do the mapping
197
  try:
198
  script = generate_script(
199
  full_prompt,
@@ -201,7 +217,8 @@ def generate_podcast(
201
  tone,
202
  f"{length_minutes} Mins",
203
  host_name=host_name or "Jane",
204
- guest_name=guest_name or "John"
 
205
  )
206
  except Exception as e:
207
  return None, f"Error generating script: {str(e)}"
@@ -211,24 +228,28 @@ def generate_podcast(
211
  crossfade_duration = 50 # ms
212
 
213
  try:
 
214
  for item in script.dialogue:
215
- # item.speaker is guaranteed "Jane" or "John"
216
- # item.display_speaker is the user-facing name
217
  audio_file = generate_audio_mp3(item.text, item.speaker)
218
  seg = AudioSegment.from_file(audio_file, format="mp3")
219
  audio_segments.append(seg)
 
 
220
  transcript += f"**{item.display_speaker}**: {item.text}\n\n"
221
  os.remove(audio_file)
222
 
223
  if not audio_segments:
224
  return None, "No audio segments generated."
225
 
 
226
  combined_spoken = audio_segments[0]
227
  for seg in audio_segments[1:]:
228
  combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
229
 
 
230
  final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
231
 
 
232
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
233
  final_mix.export(temp_audio.name, format="mp3")
234
  final_mp3_path = temp_audio.name
@@ -242,10 +263,11 @@ def generate_podcast(
242
  except Exception as e:
243
  return None, f"Error generating audio: {str(e)}"
244
 
 
245
  def highlight_differences(original: str, edited: str) -> str:
246
  """
247
  Highlights the differences between the original and edited transcripts.
248
- Added or modified words are wrapped in <span> tags with red color.
249
  """
250
  matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
251
  highlighted = []
@@ -256,14 +278,19 @@ def highlight_differences(original: str, edited: str) -> str:
256
  added_words = edited.split()[j1:j2]
257
  highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
258
  elif opcode == 'delete':
 
259
  pass
260
  return ' '.join(highlighted)
261
 
 
262
  def main():
 
263
  st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
264
 
 
265
  st.markdown("## MyPod - AI powered Podcast Generator")
266
 
 
267
  st.markdown(
268
  "Welcome to **MyPod**, your go-to AI-powered podcast generator! πŸŽ‰\n\n"
269
  "MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
@@ -272,6 +299,7 @@ def main():
272
  "1. **Provide one source:** PDF Files, Website URL, YouTube link or a Topic to Research.\n"
273
  "2. **Choose the tone and the target duration.**\n"
274
  "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
 
275
  "**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
276
  "**Note:** YouTube videos will only work if they have captions built in.\n\n"
277
  "⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
@@ -279,6 +307,7 @@ def main():
279
  "πŸ”₯ **Ready to create your personalized podcast?** Give it a try now and let the magic happen! πŸ”₯"
280
  )
281
 
 
282
  col1, col2 = st.columns(2)
283
  with col1:
284
  file = st.file_uploader("Upload File (.pdf only)", type=["pdf"])
@@ -289,16 +318,30 @@ def main():
289
  tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
290
  length_minutes = st.slider("Podcast Length (in minutes)", 1, 60, 3)
291
 
 
292
  st.markdown("### Customize Your Podcast (New Features)")
 
 
293
  with st.expander("Set Host & Guest Names/Descriptions (Optional)"):
294
  host_name = st.text_input("Host Name (leave blank for 'Jane')")
295
  host_desc = st.text_input("Host Description (Optional)")
296
  guest_name = st.text_input("Guest Name (leave blank for 'John')")
297
  guest_desc = st.text_input("Guest Description (Optional)")
298
 
 
299
  user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
 
 
300
  sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
301
 
 
 
 
 
 
 
 
 
302
  custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
303
  custom_bg_music_path = None
304
  if custom_bg_music_file:
@@ -306,6 +349,7 @@ def main():
306
  tmp.write(custom_bg_music_file.read())
307
  custom_bg_music_path = tmp.name
308
 
 
309
  if "audio_bytes" not in st.session_state:
310
  st.session_state["audio_bytes"] = None
311
  if "transcript" not in st.session_state:
@@ -313,32 +357,34 @@ def main():
313
  if "transcript_original" not in st.session_state:
314
  st.session_state["transcript_original"] = None
315
 
 
316
  generate_button = st.button("Generate Podcast")
317
 
318
  if generate_button:
319
  progress_bar = st.progress(0)
320
  progress_text = st.empty()
321
 
322
- messages = [
323
  "πŸ” Analyzing your input...",
324
  "πŸ“ Crafting the perfect script...",
325
  "πŸŽ™οΈ Generating high-quality audio...",
326
  "🎢 Adding the finishing touches..."
327
  ]
328
 
329
- progress_text.write(messages[0])
 
330
  progress_bar.progress(0)
331
  time.sleep(1.0)
332
 
333
- progress_text.write(messages[1])
334
  progress_bar.progress(25)
335
  time.sleep(1.0)
336
 
337
- progress_text.write(messages[2])
338
  progress_bar.progress(50)
339
  time.sleep(1.0)
340
 
341
- progress_text.write(messages[3])
342
  progress_bar.progress(75)
343
  time.sleep(1.0)
344
 
@@ -355,12 +401,14 @@ def main():
355
  guest_desc,
356
  user_specs,
357
  sponsor_content,
 
358
  custom_bg_music_path
359
  )
360
 
361
  progress_bar.progress(100)
362
  progress_text.write("βœ… Done!")
363
 
 
364
  if audio_bytes is None:
365
  st.error(transcript)
366
  st.session_state["audio_bytes"] = None
@@ -372,6 +420,7 @@ def main():
372
  st.session_state["transcript"] = transcript
373
  st.session_state["transcript_original"] = transcript
374
 
 
375
  if st.session_state["audio_bytes"]:
376
  st.audio(st.session_state["audio_bytes"], format='audio/mp3')
377
  st.download_button(
@@ -388,13 +437,15 @@ def main():
388
  height=300
389
  )
390
 
 
391
  if st.session_state["transcript_original"]:
392
- highlighted = highlight_differences(
393
  st.session_state["transcript_original"],
394
  edited_text
395
  )
 
396
  st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
397
- st.markdown(highlighted, unsafe_allow_html=True)
398
 
399
  if st.button("Regenerate Audio From Edited Text"):
400
  regen_bar = st.progress(0)
@@ -408,9 +459,12 @@ def main():
408
  regen_bar.progress(50)
409
  time.sleep(1.0)
410
 
411
- # Parse lines, map to DialogueItem with correct TTS speaker
412
- # host => female (Jane), guest => male (John)
413
- dialogue_items = parse_user_edited_transcript(edited_text, host_name or "Jane", guest_name or "John")
 
 
 
414
  new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path)
415
 
416
  regen_bar.progress(75)
@@ -438,5 +492,6 @@ def main():
438
  st.markdown("### Updated Transcript")
439
  st.markdown(new_transcript)
440
 
 
441
  if __name__ == "__main__":
442
  main()
 
17
  transcribe_youtube_video,
18
  research_topic,
19
  mix_with_bg_music,
20
+ DialogueItem
21
  )
22
  from prompts import SYSTEM_PROMPT
23
 
24
+
25
  def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
26
  """
27
  Looks for lines like:
28
  **Angela**: Hello
29
  **Dimitris**: Great topic...
30
  We treat 'Angela' as the raw display_speaker, 'Hello' as text.
31
+ Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
32
+ 'Dimitris' -> speaker='John' (if it matches guest_name), etc.
33
+
34
+ Returns a list of DialogueItem.
35
  """
36
+ # Regex pattern to match lines in the format:
37
+ # **SpeakerName**: Some text here
38
  pattern = r"\*\*(.+?)\*\*:\s*(.+)"
39
  matches = re.findall(pattern, edited_text)
40
 
41
  items = []
42
+
43
+ # If no matches found, treat entire text as if it's from the host
44
  if not matches:
 
45
  raw_name = host_name or "Jane"
46
  text_line = edited_text.strip()
47
  speaker = "Jane"
48
+ # If user typed the entire text under the guest's name, switch
49
  if raw_name.lower() == guest_name.lower():
50
  speaker = "John"
 
51
  item = DialogueItem(
52
  speaker=speaker,
53
  display_speaker=raw_name,
 
56
  items.append(item)
57
  return items
58
 
59
+ # Otherwise, we found multiple lines
60
  for (raw_name, text_line) in matches:
61
+ # Decide TTS speaker based on the name
62
  if raw_name.lower() == host_name.lower():
 
63
  speaker = "Jane"
64
  elif raw_name.lower() == guest_name.lower():
 
65
  speaker = "John"
66
  else:
67
+ # Default to "Jane" if we can't match
68
  speaker = "Jane"
69
  item = DialogueItem(
70
  speaker=speaker,
 
72
  text=text_line
73
  )
74
  items.append(item)
75
+
76
  return items
77
 
78
+
79
  def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
80
  """
81
  Re-generates multi-speaker audio from user-edited DialogueItems,
82
+ then mixes with background music or custom music.
83
+
84
+ Returns (audio_bytes, transcript_str).
85
  """
86
  audio_segments = []
87
  transcript = ""
88
+ crossfade_duration = 50 # ms
89
 
90
  for item in dialogue_items:
91
+ # Generate TTS for each line
92
  audio_file = generate_audio_mp3(item.text, item.speaker)
93
  seg = AudioSegment.from_file(audio_file, format="mp3")
94
  audio_segments.append(seg)
95
+ # Build the updated transcript with the user-facing display_speaker
96
  transcript += f"**{item.display_speaker}**: {item.text}\n\n"
97
  os.remove(audio_file)
98
 
99
  if not audio_segments:
100
  return None, "No audio segments were generated."
101
 
102
+ # Sequentially combine all the speaker segments
103
  combined_spoken = audio_segments[0]
104
  for seg in audio_segments[1:]:
105
  combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
106
 
107
+ # Mix final spoken track with background music
108
  final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
109
 
110
+ # Export to MP3 bytes
111
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
112
  final_mix.export(temp_audio.name, format="mp3")
113
  final_mp3_path = temp_audio.name
 
118
 
119
  return audio_bytes, transcript
120
 
121
+
122
  def generate_podcast(
123
  file,
124
  url,
 
132
  guest_desc,
133
  user_specs,
134
  sponsor_content,
135
+ sponsor_style, # NEW: "Separate Break" or "Blended"
136
  custom_bg_music_path
137
  ):
138
  """
139
  Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
140
+ Ensures female voice for host (Jane), male voice for guest (John).
141
+ Sponsor content is either separate or blended based on sponsor_style.
142
 
143
  Returns (audio_bytes, transcript_str).
144
  """
145
+ # Validate only one input source
146
  sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
147
  if sum(sources) > 1:
148
+ return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
149
  if not any(sources):
150
  return None, "Please provide at least one source."
151
 
152
+ # Load or extract text
153
  text = ""
154
  if file:
155
  try:
 
181
  except Exception as e:
182
  return None, f"Error researching topic: {str(e)}"
183
 
184
+ # Truncate text if too long
185
  text = truncate_text(text)
186
 
187
+ # Build extra prompt instructions
188
  extra_instructions = []
189
 
190
+ # Host/Guest descriptions
191
  if host_name or guest_name:
192
+ host_line = f"Host: {host_name or 'Jane'} - {host_desc or 'a curious host'}."
193
+ guest_line = f"Guest: {guest_name or 'John'} - {guest_desc or 'an expert'}."
194
+ extra_instructions.append(f"{host_line}\n{guest_line}")
195
 
196
+ # User custom specs
197
  if user_specs.strip():
198
  extra_instructions.append(f"Additional User Instructions: {user_specs}")
199
 
200
+ # Sponsor content
201
  if sponsor_content.strip():
202
  extra_instructions.append(
203
+ f"Sponsor Content Provided (under ~30 seconds):\n{sponsor_content}"
 
204
  )
205
 
206
+ # Combine all instructions
207
  combined_instructions = "\n\n".join(extra_instructions).strip()
208
  full_prompt = SYSTEM_PROMPT
209
  if combined_instructions:
210
  full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
211
 
212
+ # Generate the script
213
  try:
214
  script = generate_script(
215
  full_prompt,
 
217
  tone,
218
  f"{length_minutes} Mins",
219
  host_name=host_name or "Jane",
220
+ guest_name=guest_name or "John",
221
+ sponsor_style=sponsor_style # pass style to the LLM
222
  )
223
  except Exception as e:
224
  return None, f"Error generating script: {str(e)}"
 
228
  crossfade_duration = 50 # ms
229
 
230
  try:
231
+ # For each dialogue item, do TTS
232
  for item in script.dialogue:
 
 
233
  audio_file = generate_audio_mp3(item.text, item.speaker)
234
  seg = AudioSegment.from_file(audio_file, format="mp3")
235
  audio_segments.append(seg)
236
+
237
+ # Build transcript with display_speaker
238
  transcript += f"**{item.display_speaker}**: {item.text}\n\n"
239
  os.remove(audio_file)
240
 
241
  if not audio_segments:
242
  return None, "No audio segments generated."
243
 
244
+ # Combine speaker segments
245
  combined_spoken = audio_segments[0]
246
  for seg in audio_segments[1:]:
247
  combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
248
 
249
+ # Mix with background music
250
  final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
251
 
252
+ # Export final to MP3 bytes
253
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
254
  final_mix.export(temp_audio.name, format="mp3")
255
  final_mp3_path = temp_audio.name
 
263
  except Exception as e:
264
  return None, f"Error generating audio: {str(e)}"
265
 
266
+
267
  def highlight_differences(original: str, edited: str) -> str:
268
  """
269
  Highlights the differences between the original and edited transcripts.
270
+ Added or replaced words are wrapped in <span style="color:red">...</span>.
271
  """
272
  matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
273
  highlighted = []
 
278
  added_words = edited.split()[j1:j2]
279
  highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
280
  elif opcode == 'delete':
281
+ # We ignore deletions
282
  pass
283
  return ' '.join(highlighted)
284
 
285
+
286
  def main():
287
+ # Set page config
288
  st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
289
 
290
+ # Main header
291
  st.markdown("## MyPod - AI powered Podcast Generator")
292
 
293
+ # Original Intro Explanation
294
  st.markdown(
295
  "Welcome to **MyPod**, your go-to AI-powered podcast generator! πŸŽ‰\n\n"
296
  "MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
 
299
  "1. **Provide one source:** PDF Files, Website URL, YouTube link or a Topic to Research.\n"
300
  "2. **Choose the tone and the target duration.**\n"
301
  "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
302
+ "**Research a Topic:** If it's too niche or specific, you might not get the desired outcome.\n\n"
303
  "**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
304
  "**Note:** YouTube videos will only work if they have captions built in.\n\n"
305
  "⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
 
307
  "πŸ”₯ **Ready to create your personalized podcast?** Give it a try now and let the magic happen! πŸ”₯"
308
  )
309
 
310
+ # Two columns for inputs
311
  col1, col2 = st.columns(2)
312
  with col1:
313
  file = st.file_uploader("Upload File (.pdf only)", type=["pdf"])
 
318
  tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
319
  length_minutes = st.slider("Podcast Length (in minutes)", 1, 60, 3)
320
 
321
+ # Additional user customizations
322
  st.markdown("### Customize Your Podcast (New Features)")
323
+
324
+ # Host & Guest
325
  with st.expander("Set Host & Guest Names/Descriptions (Optional)"):
326
  host_name = st.text_input("Host Name (leave blank for 'Jane')")
327
  host_desc = st.text_input("Host Description (Optional)")
328
  guest_name = st.text_input("Guest Name (leave blank for 'John')")
329
  guest_desc = st.text_input("Guest Description (Optional)")
330
 
331
+ # Additional specs
332
  user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
333
+
334
+ # Sponsor content
335
  sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
336
 
337
+ # Sponsor integration style
338
+ sponsor_style = st.selectbox(
339
+ "Sponsor Integration Style",
340
+ ["Separate Break", "Blended"],
341
+ help="Choose whether to insert sponsor content as a separate ad break or blend it into the conversation."
342
+ )
343
+
344
+ # Custom background music
345
  custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
346
  custom_bg_music_path = None
347
  if custom_bg_music_file:
 
349
  tmp.write(custom_bg_music_file.read())
350
  custom_bg_music_path = tmp.name
351
 
352
+ # Initialize session state if needed
353
  if "audio_bytes" not in st.session_state:
354
  st.session_state["audio_bytes"] = None
355
  if "transcript" not in st.session_state:
 
357
  if "transcript_original" not in st.session_state:
358
  st.session_state["transcript_original"] = None
359
 
360
+ # Generate button
361
  generate_button = st.button("Generate Podcast")
362
 
363
  if generate_button:
364
  progress_bar = st.progress(0)
365
  progress_text = st.empty()
366
 
367
+ progress_messages = [
368
  "πŸ” Analyzing your input...",
369
  "πŸ“ Crafting the perfect script...",
370
  "πŸŽ™οΈ Generating high-quality audio...",
371
  "🎢 Adding the finishing touches..."
372
  ]
373
 
374
+ # Simulate progress stages
375
+ progress_text.write(progress_messages[0])
376
  progress_bar.progress(0)
377
  time.sleep(1.0)
378
 
379
+ progress_text.write(progress_messages[1])
380
  progress_bar.progress(25)
381
  time.sleep(1.0)
382
 
383
+ progress_text.write(progress_messages[2])
384
  progress_bar.progress(50)
385
  time.sleep(1.0)
386
 
387
+ progress_text.write(progress_messages[3])
388
  progress_bar.progress(75)
389
  time.sleep(1.0)
390
 
 
401
  guest_desc,
402
  user_specs,
403
  sponsor_content,
404
+ sponsor_style,
405
  custom_bg_music_path
406
  )
407
 
408
  progress_bar.progress(100)
409
  progress_text.write("βœ… Done!")
410
 
411
+ # Check results
412
  if audio_bytes is None:
413
  st.error(transcript)
414
  st.session_state["audio_bytes"] = None
 
420
  st.session_state["transcript"] = transcript
421
  st.session_state["transcript_original"] = transcript
422
 
423
+ # If we have audio bytes, display the player and transcript
424
  if st.session_state["audio_bytes"]:
425
  st.audio(st.session_state["audio_bytes"], format='audio/mp3')
426
  st.download_button(
 
437
  height=300
438
  )
439
 
440
+ # Show differences from the original
441
  if st.session_state["transcript_original"]:
442
+ highlighted_transcript = highlight_differences(
443
  st.session_state["transcript_original"],
444
  edited_text
445
  )
446
+
447
  st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
448
+ st.markdown(highlighted_transcript, unsafe_allow_html=True)
449
 
450
  if st.button("Regenerate Audio From Edited Text"):
451
  regen_bar = st.progress(0)
 
459
  regen_bar.progress(50)
460
  time.sleep(1.0)
461
 
462
+ # Parse the user-edited transcript
463
+ dialogue_items = parse_user_edited_transcript(
464
+ edited_text,
465
+ host_name or "Jane",
466
+ guest_name or "John"
467
+ )
468
  new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path)
469
 
470
  regen_bar.progress(75)
 
492
  st.markdown("### Updated Transcript")
493
  st.markdown(new_transcript)
494
 
495
+
496
  if __name__ == "__main__":
497
  main()