siddhartharyaai commited on
Commit
084e565
·
verified ·
1 Parent(s): f9e4b6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -56
app.py CHANGED
@@ -6,8 +6,7 @@ import re
6
  import os
7
  import tempfile
8
  import pypdf
9
- from pydub import AudioSegment
10
- from pydub import effects # for normalizing volume if needed
11
 
12
  from utils import (
13
  generate_script,
@@ -19,6 +18,7 @@ from utils import (
19
  )
20
  from prompts import SYSTEM_PROMPT
21
 
 
22
  def parse_user_edited_transcript(edited_text: str):
23
  """
24
  Looks for lines like:
@@ -32,13 +32,13 @@ def parse_user_edited_transcript(edited_text: str):
32
  return [("Jane", edited_text)]
33
  return matches
34
 
 
35
  def regenerate_audio_from_dialogue(dialogue_items):
36
  """
37
  Re-generates multi-speaker audio from user-edited text,
38
- then mixes with background music from the root folder (bg_music.mp3).
39
  Returns final audio bytes and updated transcript.
40
  """
41
- # 1) Create spoken segments
42
  audio_segments = []
43
  transcript = ""
44
  crossfade_duration = 50 # in ms
@@ -53,15 +53,15 @@ def regenerate_audio_from_dialogue(dialogue_items):
53
  if not audio_segments:
54
  return None, "No audio segments were generated."
55
 
56
- # 2) Combine spoken segments
57
  combined_spoken = audio_segments[0]
58
  for seg in audio_segments[1:]:
59
  combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
60
 
61
- # 3) Mix with background music
62
  final_mix = mix_with_bg_music(combined_spoken)
63
 
64
- # 4) Export to bytes
65
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
66
  final_mix.export(temp_audio.name, format="mp3")
67
  final_mp3_path = temp_audio.name
@@ -72,6 +72,7 @@ def regenerate_audio_from_dialogue(dialogue_items):
72
 
73
  return audio_bytes, transcript
74
 
 
75
  def generate_podcast(file, url, video_url, research_topic_input, tone, length):
76
  """
77
  Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
@@ -83,7 +84,6 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
83
  if not any(sources):
84
  return None, "Please provide at least one source."
85
 
86
- # 1) Fetch text
87
  text = ""
88
  if file:
89
  try:
@@ -115,14 +115,13 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
115
  except Exception as e:
116
  return None, f"Error researching topic: {str(e)}"
117
 
118
- # 2) Generate multi-speaker script
119
  try:
120
  text = truncate_text(text)
121
  script = generate_script(SYSTEM_PROMPT, text, tone, length)
122
  except Exception as e:
123
  return None, f"Error generating script: {str(e)}"
124
 
125
- # 3) Convert dialogue to spoken segments
126
  audio_segments = []
127
  transcript = ""
128
  crossfade_duration = 50 # ms
@@ -138,12 +137,11 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
138
  if not audio_segments:
139
  return None, "No audio segments generated."
140
 
141
- # Combine
142
  combined_spoken = audio_segments[0]
143
  for seg in audio_segments[1:]:
144
  combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
145
 
146
- # Mix with background music
147
  final_mix = mix_with_bg_music(combined_spoken)
148
 
149
  # Export to bytes
@@ -160,6 +158,7 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
160
  except Exception as e:
161
  return None, f"Error generating audio: {str(e)}"
162
 
 
163
  def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
164
  """
165
  Mixes 'spoken' with bg_music.mp3 in the root folder:
@@ -167,8 +166,7 @@ def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
167
  2) Loop the music if it's shorter than the final audio length.
168
  3) Lower the music volume so the speech is clear.
169
  """
170
- # Path to background music in root folder:
171
- bg_music_path = "bg_music.mp3" # root-level file
172
 
173
  try:
174
  bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
@@ -176,82 +174,56 @@ def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
176
  print("[ERROR] Failed to load background music:", e)
177
  return spoken
178
 
179
- # Lower the music volume (e.g. -14 dB)
180
- bg_music = bg_music - 14.0
181
 
182
- # total_length_ms = spoken length + 2000ms intro
183
  total_length_ms = len(spoken) + 2000
184
-
185
- # Loop the music if it's shorter than total_length_ms
186
  looped_music = AudioSegment.empty()
187
  while len(looped_music) < total_length_ms:
188
  looped_music += bg_music
189
 
190
- # Crop to exact total_length_ms
191
  looped_music = looped_music[:total_length_ms]
192
 
193
- # Create 2s intro for music before speech
194
  final_mix = looped_music.overlay(spoken, position=2000)
195
 
196
  return final_mix
197
 
 
198
  def main():
199
- # Move set_page_config to the top if needed
200
  st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
201
 
202
- st.markdown(
203
- """
204
- <style>
205
- :root {
206
- color-scheme: light dark;
207
- }
208
- body {
209
- background-color: #f0f2f6;
210
- color: #222;
211
- }
212
- .css-18e3th9 {
213
- background-color: #e8eaf2;
214
- }
215
- .stButton>button {
216
- background-color: #0066cc;
217
- color: white;
218
- border-radius: 8px;
219
- }
220
- .stProgress>div>div>div>div {
221
- background-color: #0066cc;
222
- }
223
- </style>
224
- """,
225
- unsafe_allow_html=True
226
- )
227
-
228
- st.title("🎙 MyPod - AI-based Podcast Generator")
229
 
230
  st.markdown(
231
  "Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉\n\n"
232
  "MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
233
  "Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
234
  "### How to use:\n"
235
- "1. **Provide one source:** PDF, URL, YouTube link, or a Topic to Research.\n"
236
  "2. **Choose the tone and the target duration.**\n"
237
- "3. **Click 'Generate Podcast'** to produce your podcast.\n\n"
238
- "**After** the audio is generated, you can **edit** the transcript \n"
239
- "and **re-generate** the audio with your edits if needed.\n\n"
240
- "⏳**Please be patient while your podcast is being generated.** It involves content analysis, script creation, "
 
 
 
241
  "and high-quality audio synthesis, which may take a few minutes.\n\n"
242
- "🔥 **Ready to create your personalized podcast?** Give it a try now!"
243
  )
244
 
245
  col1, col2 = st.columns(2)
246
  with col1:
247
  file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"])
248
  url = st.text_input("Or Enter URL")
249
- video_url = st.text_input("Or Enter YouTube Link")
250
  with col2:
251
  research_topic_input = st.text_input("Or Research a Topic")
252
  tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
253
  length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
254
 
 
255
  if "audio_bytes" not in st.session_state:
256
  st.session_state["audio_bytes"] = None
257
  if "transcript" not in st.session_state:
 
6
  import os
7
  import tempfile
8
  import pypdf
9
+ from pydub import AudioSegment, effects
 
10
 
11
  from utils import (
12
  generate_script,
 
18
  )
19
  from prompts import SYSTEM_PROMPT
20
 
21
+
22
  def parse_user_edited_transcript(edited_text: str):
23
  """
24
  Looks for lines like:
 
32
  return [("Jane", edited_text)]
33
  return matches
34
 
35
+
36
  def regenerate_audio_from_dialogue(dialogue_items):
37
  """
38
  Re-generates multi-speaker audio from user-edited text,
39
+ then mixes with background music in the root folder (bg_music.mp3).
40
  Returns final audio bytes and updated transcript.
41
  """
 
42
  audio_segments = []
43
  transcript = ""
44
  crossfade_duration = 50 # in ms
 
53
  if not audio_segments:
54
  return None, "No audio segments were generated."
55
 
56
+ # Combine spoken segments
57
  combined_spoken = audio_segments[0]
58
  for seg in audio_segments[1:]:
59
  combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
60
 
61
+ # Mix with background music
62
  final_mix = mix_with_bg_music(combined_spoken)
63
 
64
+ # Export to bytes
65
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
66
  final_mix.export(temp_audio.name, format="mp3")
67
  final_mp3_path = temp_audio.name
 
72
 
73
  return audio_bytes, transcript
74
 
75
+
76
  def generate_podcast(file, url, video_url, research_topic_input, tone, length):
77
  """
78
  Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
 
84
  if not any(sources):
85
  return None, "Please provide at least one source."
86
 
 
87
  text = ""
88
  if file:
89
  try:
 
115
  except Exception as e:
116
  return None, f"Error researching topic: {str(e)}"
117
 
118
+ # Generate script
119
  try:
120
  text = truncate_text(text)
121
  script = generate_script(SYSTEM_PROMPT, text, tone, length)
122
  except Exception as e:
123
  return None, f"Error generating script: {str(e)}"
124
 
 
125
  audio_segments = []
126
  transcript = ""
127
  crossfade_duration = 50 # ms
 
137
  if not audio_segments:
138
  return None, "No audio segments generated."
139
 
 
140
  combined_spoken = audio_segments[0]
141
  for seg in audio_segments[1:]:
142
  combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
143
 
144
+ # Mix with bg music
145
  final_mix = mix_with_bg_music(combined_spoken)
146
 
147
  # Export to bytes
 
158
  except Exception as e:
159
  return None, f"Error generating audio: {str(e)}"
160
 
161
+
162
  def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
163
  """
164
  Mixes 'spoken' with bg_music.mp3 in the root folder:
 
166
  2) Loop the music if it's shorter than the final audio length.
167
  3) Lower the music volume so the speech is clear.
168
  """
169
+ bg_music_path = "bg_music.mp3" # in root folder
 
170
 
171
  try:
172
  bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
 
174
  print("[ERROR] Failed to load background music:", e)
175
  return spoken
176
 
177
+ bg_music = bg_music - 14.0 # Lower volume (e.g. -14 dB)
 
178
 
 
179
  total_length_ms = len(spoken) + 2000
 
 
180
  looped_music = AudioSegment.empty()
181
  while len(looped_music) < total_length_ms:
182
  looped_music += bg_music
183
 
 
184
  looped_music = looped_music[:total_length_ms]
185
 
186
+ # Overlay spoken at 2000ms so we get 2s of music first
187
  final_mix = looped_music.overlay(spoken, position=2000)
188
 
189
  return final_mix
190
 
191
+
192
  def main():
 
193
  st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
194
 
195
+ # Use smaller font for the main header
196
+ st.markdown("## MyPod - AI powered Podcast Generator")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  st.markdown(
199
  "Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉\n\n"
200
  "MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
201
  "Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
202
  "### How to use:\n"
203
+ "1. **Provide one source:** PDF, URL, YouTube link (Requires User Auth - Work in Progress), or a Topic to Research.\n"
204
  "2. **Choose the tone and the target duration.**\n"
205
+ "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
206
+ "**Research a Topic:** Please be as detailed as possible in your topic statement. If it's too niche or specific, "
207
+ "you might not get the desired outcome. We'll fetch information from Wikipedia and RSS feeds (BBC, CNN, Associated Press, "
208
+ "NDTV, Times of India, The Hindu, Economic Times, Google News) or the LLM knowledge base to get recent info about the topic.\n\n"
209
+ "**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
210
+ "**Note:** YouTube transcription uses Whisper on CPU and may take longer for very long videos.\n\n"
211
+ "⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
212
  "and high-quality audio synthesis, which may take a few minutes.\n\n"
213
+ "🔥 **Ready to create your personalized podcast?** Give it a try now and let the magic happen! 🔥"
214
  )
215
 
216
  col1, col2 = st.columns(2)
217
  with col1:
218
  file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"])
219
  url = st.text_input("Or Enter URL")
220
+ video_url = st.text_input("Or Enter YouTube Link (Requires User Auth - Work in Progress)")
221
  with col2:
222
  research_topic_input = st.text_input("Or Research a Topic")
223
  tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
224
  length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
225
 
226
+ # Store results in session_state
227
  if "audio_bytes" not in st.session_state:
228
  st.session_state["audio_bytes"] = None
229
  if "transcript" not in st.session_state: