Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import os
|
|
7 |
import tempfile
|
8 |
import pypdf
|
9 |
from pydub import AudioSegment
|
|
|
10 |
|
11 |
from utils import (
|
12 |
generate_script,
|
@@ -28,15 +29,16 @@ def parse_user_edited_transcript(edited_text: str):
|
|
28 |
pattern = r"\*\*(Jane|John)\*\*:\s*(.+)"
|
29 |
matches = re.findall(pattern, edited_text)
|
30 |
if not matches:
|
31 |
-
# If user changed the format drastically, treat entire text as Jane
|
32 |
return [("Jane", edited_text)]
|
33 |
return matches
|
34 |
|
35 |
def regenerate_audio_from_dialogue(dialogue_items):
|
36 |
"""
|
37 |
-
Re-generates multi-speaker audio from user-edited text
|
|
|
38 |
Returns final audio bytes and updated transcript.
|
39 |
"""
|
|
|
40 |
audio_segments = []
|
41 |
transcript = ""
|
42 |
crossfade_duration = 50 # in ms
|
@@ -51,16 +53,19 @@ def regenerate_audio_from_dialogue(dialogue_items):
|
|
51 |
if not audio_segments:
|
52 |
return None, "No audio segments were generated."
|
53 |
|
54 |
-
# Combine
|
55 |
-
|
56 |
for seg in audio_segments[1:]:
|
57 |
-
|
58 |
|
|
|
|
|
|
|
|
|
59 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
60 |
-
|
61 |
final_mp3_path = temp_audio.name
|
62 |
|
63 |
-
# Read bytes and return them (so we have a real .mp3 to download)
|
64 |
with open(final_mp3_path, "rb") as f:
|
65 |
audio_bytes = f.read()
|
66 |
os.remove(final_mp3_path)
|
@@ -69,12 +74,8 @@ def regenerate_audio_from_dialogue(dialogue_items):
|
|
69 |
|
70 |
def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
71 |
"""
|
72 |
-
Creates a multi-speaker podcast from
|
73 |
-
|
74 |
-
- URL
|
75 |
-
- YouTube video
|
76 |
-
- or a research topic input.
|
77 |
-
Returns (audio_bytes, transcript_str).
|
78 |
"""
|
79 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
80 |
if sum(sources) > 1:
|
@@ -82,9 +83,9 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
82 |
if not any(sources):
|
83 |
return None, "Please provide at least one source."
|
84 |
|
|
|
85 |
text = ""
|
86 |
if file:
|
87 |
-
# Handle PDF
|
88 |
try:
|
89 |
if not file.name.lower().endswith('.pdf'):
|
90 |
return None, "Please upload a PDF file."
|
@@ -93,7 +94,6 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
93 |
except Exception as e:
|
94 |
return None, f"Error reading PDF: {str(e)}"
|
95 |
elif url:
|
96 |
-
# Handle URL
|
97 |
try:
|
98 |
text = extract_text_from_url(url)
|
99 |
if not text:
|
@@ -101,7 +101,6 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
101 |
except Exception as e:
|
102 |
return None, f"Error extracting text from URL: {str(e)}"
|
103 |
elif video_url:
|
104 |
-
# Handle YouTube
|
105 |
try:
|
106 |
text = transcribe_youtube_video(video_url)
|
107 |
if not text:
|
@@ -109,7 +108,6 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
109 |
except Exception as e:
|
110 |
return None, f"Error transcribing YouTube video: {str(e)}"
|
111 |
elif research_topic_input:
|
112 |
-
# Handle research topic
|
113 |
try:
|
114 |
text = research_topic(research_topic_input)
|
115 |
if not text:
|
@@ -117,13 +115,14 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
117 |
except Exception as e:
|
118 |
return None, f"Error researching topic: {str(e)}"
|
119 |
|
120 |
-
# Generate
|
121 |
try:
|
122 |
text = truncate_text(text)
|
123 |
script = generate_script(SYSTEM_PROMPT, text, tone, length)
|
124 |
except Exception as e:
|
125 |
return None, f"Error generating script: {str(e)}"
|
126 |
|
|
|
127 |
audio_segments = []
|
128 |
transcript = ""
|
129 |
crossfade_duration = 50 # ms
|
@@ -139,31 +138,67 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
139 |
if not audio_segments:
|
140 |
return None, "No audio segments generated."
|
141 |
|
142 |
-
|
|
|
143 |
for seg in audio_segments[1:]:
|
144 |
-
|
|
|
|
|
|
|
145 |
|
|
|
146 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
147 |
-
|
148 |
final_mp3_path = temp_audio.name
|
149 |
|
150 |
-
# Convert final mp3 to bytes
|
151 |
with open(final_mp3_path, "rb") as f:
|
152 |
audio_bytes = f.read()
|
153 |
os.remove(final_mp3_path)
|
154 |
|
155 |
return audio_bytes, transcript
|
|
|
156 |
except Exception as e:
|
157 |
return None, f"Error generating audio: {str(e)}"
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
def main():
|
160 |
-
#
|
161 |
-
st.set_page_config(
|
162 |
-
page_title="MyPod - AI-based Podcast Generator",
|
163 |
-
layout="centered"
|
164 |
-
)
|
165 |
|
166 |
-
# Enable "light or dark" theme via custom CSS
|
167 |
st.markdown(
|
168 |
"""
|
169 |
<style>
|
@@ -197,19 +232,14 @@ def main():
|
|
197 |
"MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
|
198 |
"Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
|
199 |
"### How to use:\n"
|
200 |
-
"1. **Provide one source:** PDF, URL, YouTube link
|
201 |
"2. **Choose the tone and the target duration.**\n"
|
202 |
"3. **Click 'Generate Podcast'** to produce your podcast.\n\n"
|
203 |
"**After** the audio is generated, you can **edit** the transcript \n"
|
204 |
"and **re-generate** the audio with your edits if needed.\n\n"
|
205 |
-
"
|
206 |
-
"you might not get the desired outcome. We'll fetch information from Wikipedia and RSS feeds (BBC, CNN, Associated Press, "
|
207 |
-
"NDTV, Times of India, The Hindu, Economic Times, Google News) or the LLM knowledge base to get recent info about the topic.\n\n"
|
208 |
-
"**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
|
209 |
-
"**Note:** YouTube transcription uses Whisper on CPU and may take longer for very long videos.\n\n"
|
210 |
-
"⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
|
211 |
"and high-quality audio synthesis, which may take a few minutes.\n\n"
|
212 |
-
"🔥 **Ready to create your personalized podcast?** Give it a try now
|
213 |
)
|
214 |
|
215 |
col1, col2 = st.columns(2)
|
@@ -222,7 +252,6 @@ def main():
|
|
222 |
tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
|
223 |
length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
|
224 |
|
225 |
-
# Use session_state to avoid losing results if user clicks away
|
226 |
if "audio_bytes" not in st.session_state:
|
227 |
st.session_state["audio_bytes"] = None
|
228 |
if "transcript" not in st.session_state:
|
@@ -231,11 +260,9 @@ def main():
|
|
231 |
generate_button = st.button("Generate Podcast")
|
232 |
|
233 |
if generate_button:
|
234 |
-
# Show a pseudo progress bar for user engagement
|
235 |
progress_bar = st.progress(0)
|
236 |
progress_text = st.empty()
|
237 |
|
238 |
-
# Steps to pretend some progress:
|
239 |
progress_text.write("Alright, let's get started...")
|
240 |
progress_bar.progress(10)
|
241 |
time.sleep(1.0)
|
@@ -258,7 +285,6 @@ def main():
|
|
258 |
|
259 |
if audio_bytes is None:
|
260 |
st.error(transcript)
|
261 |
-
# Clear session state
|
262 |
st.session_state["audio_bytes"] = None
|
263 |
st.session_state["transcript"] = None
|
264 |
else:
|
@@ -266,11 +292,8 @@ def main():
|
|
266 |
st.session_state["audio_bytes"] = audio_bytes
|
267 |
st.session_state["transcript"] = transcript
|
268 |
|
269 |
-
# Check if we have a stored result
|
270 |
if st.session_state["audio_bytes"]:
|
271 |
-
# Show the audio
|
272 |
st.audio(st.session_state["audio_bytes"], format='audio/mp3')
|
273 |
-
# Provide a download button with .mp3 extension
|
274 |
st.download_button(
|
275 |
label="Download Podcast (MP3)",
|
276 |
data=st.session_state["audio_bytes"],
|
@@ -278,7 +301,6 @@ def main():
|
|
278 |
mime="audio/mpeg"
|
279 |
)
|
280 |
|
281 |
-
# Show the transcript in a text area for editing
|
282 |
st.markdown("### Generated Transcript (Editable)")
|
283 |
edited_text = st.text_area(
|
284 |
"Feel free to tweak lines, fix errors, or reword anything.",
|
@@ -286,7 +308,6 @@ def main():
|
|
286 |
height=300
|
287 |
)
|
288 |
|
289 |
-
# Regenerate button
|
290 |
if st.button("Regenerate Audio From Edited Text"):
|
291 |
regen_bar = st.progress(0)
|
292 |
regen_text = st.empty()
|
@@ -299,7 +320,6 @@ def main():
|
|
299 |
regen_bar.progress(60)
|
300 |
time.sleep(1.0)
|
301 |
|
302 |
-
# Parse & regenerate
|
303 |
dialogue_items = parse_user_edited_transcript(edited_text)
|
304 |
new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items)
|
305 |
|
@@ -314,7 +334,6 @@ def main():
|
|
314 |
regen_text.write("All set!")
|
315 |
st.success("Regenerated audio below:")
|
316 |
|
317 |
-
# Store updated
|
318 |
st.session_state["audio_bytes"] = new_audio_bytes
|
319 |
st.session_state["transcript"] = new_transcript
|
320 |
|
|
|
7 |
import tempfile
|
8 |
import pypdf
|
9 |
from pydub import AudioSegment
|
10 |
+
from pydub import effects # for normalizing volume if needed
|
11 |
|
12 |
from utils import (
|
13 |
generate_script,
|
|
|
29 |
pattern = r"\*\*(Jane|John)\*\*:\s*(.+)"
|
30 |
matches = re.findall(pattern, edited_text)
|
31 |
if not matches:
|
|
|
32 |
return [("Jane", edited_text)]
|
33 |
return matches
|
34 |
|
35 |
def regenerate_audio_from_dialogue(dialogue_items):
|
36 |
"""
|
37 |
+
Re-generates multi-speaker audio from user-edited text,
|
38 |
+
then mixes with background music from the root folder (bg_music.mp3).
|
39 |
Returns final audio bytes and updated transcript.
|
40 |
"""
|
41 |
+
# 1) Create spoken segments
|
42 |
audio_segments = []
|
43 |
transcript = ""
|
44 |
crossfade_duration = 50 # in ms
|
|
|
53 |
if not audio_segments:
|
54 |
return None, "No audio segments were generated."
|
55 |
|
56 |
+
# 2) Combine spoken segments
|
57 |
+
combined_spoken = audio_segments[0]
|
58 |
for seg in audio_segments[1:]:
|
59 |
+
combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
|
60 |
|
61 |
+
# 3) Mix with background music
|
62 |
+
final_mix = mix_with_bg_music(combined_spoken)
|
63 |
+
|
64 |
+
# 4) Export to bytes
|
65 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
66 |
+
final_mix.export(temp_audio.name, format="mp3")
|
67 |
final_mp3_path = temp_audio.name
|
68 |
|
|
|
69 |
with open(final_mp3_path, "rb") as f:
|
70 |
audio_bytes = f.read()
|
71 |
os.remove(final_mp3_path)
|
|
|
74 |
|
75 |
def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
76 |
"""
|
77 |
+
Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
|
78 |
+
Returns (audio_bytes, transcript_str), mixing with background music in root folder (bg_music.mp3).
|
|
|
|
|
|
|
|
|
79 |
"""
|
80 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
81 |
if sum(sources) > 1:
|
|
|
83 |
if not any(sources):
|
84 |
return None, "Please provide at least one source."
|
85 |
|
86 |
+
# 1) Fetch text
|
87 |
text = ""
|
88 |
if file:
|
|
|
89 |
try:
|
90 |
if not file.name.lower().endswith('.pdf'):
|
91 |
return None, "Please upload a PDF file."
|
|
|
94 |
except Exception as e:
|
95 |
return None, f"Error reading PDF: {str(e)}"
|
96 |
elif url:
|
|
|
97 |
try:
|
98 |
text = extract_text_from_url(url)
|
99 |
if not text:
|
|
|
101 |
except Exception as e:
|
102 |
return None, f"Error extracting text from URL: {str(e)}"
|
103 |
elif video_url:
|
|
|
104 |
try:
|
105 |
text = transcribe_youtube_video(video_url)
|
106 |
if not text:
|
|
|
108 |
except Exception as e:
|
109 |
return None, f"Error transcribing YouTube video: {str(e)}"
|
110 |
elif research_topic_input:
|
|
|
111 |
try:
|
112 |
text = research_topic(research_topic_input)
|
113 |
if not text:
|
|
|
115 |
except Exception as e:
|
116 |
return None, f"Error researching topic: {str(e)}"
|
117 |
|
118 |
+
# 2) Generate multi-speaker script
|
119 |
try:
|
120 |
text = truncate_text(text)
|
121 |
script = generate_script(SYSTEM_PROMPT, text, tone, length)
|
122 |
except Exception as e:
|
123 |
return None, f"Error generating script: {str(e)}"
|
124 |
|
125 |
+
# 3) Convert dialogue to spoken segments
|
126 |
audio_segments = []
|
127 |
transcript = ""
|
128 |
crossfade_duration = 50 # ms
|
|
|
138 |
if not audio_segments:
|
139 |
return None, "No audio segments generated."
|
140 |
|
141 |
+
# Combine
|
142 |
+
combined_spoken = audio_segments[0]
|
143 |
for seg in audio_segments[1:]:
|
144 |
+
combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
|
145 |
+
|
146 |
+
# Mix with background music
|
147 |
+
final_mix = mix_with_bg_music(combined_spoken)
|
148 |
|
149 |
+
# Export to bytes
|
150 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
151 |
+
final_mix.export(temp_audio.name, format="mp3")
|
152 |
final_mp3_path = temp_audio.name
|
153 |
|
|
|
154 |
with open(final_mp3_path, "rb") as f:
|
155 |
audio_bytes = f.read()
|
156 |
os.remove(final_mp3_path)
|
157 |
|
158 |
return audio_bytes, transcript
|
159 |
+
|
160 |
except Exception as e:
|
161 |
return None, f"Error generating audio: {str(e)}"
|
162 |
|
163 |
+
def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
|
164 |
+
"""
|
165 |
+
Mixes 'spoken' with bg_music.mp3 in the root folder:
|
166 |
+
1) Start with 2 seconds of music alone before speech begins.
|
167 |
+
2) Loop the music if it's shorter than the final audio length.
|
168 |
+
3) Lower the music volume so the speech is clear.
|
169 |
+
"""
|
170 |
+
# Path to background music in root folder:
|
171 |
+
bg_music_path = "bg_music.mp3" # root-level file
|
172 |
+
|
173 |
+
try:
|
174 |
+
bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
|
175 |
+
except Exception as e:
|
176 |
+
print("[ERROR] Failed to load background music:", e)
|
177 |
+
return spoken
|
178 |
+
|
179 |
+
# Lower the music volume (e.g. -14 dB)
|
180 |
+
bg_music = bg_music - 14.0
|
181 |
+
|
182 |
+
# total_length_ms = spoken length + 2000ms intro
|
183 |
+
total_length_ms = len(spoken) + 2000
|
184 |
+
|
185 |
+
# Loop the music if it's shorter than total_length_ms
|
186 |
+
looped_music = AudioSegment.empty()
|
187 |
+
while len(looped_music) < total_length_ms:
|
188 |
+
looped_music += bg_music
|
189 |
+
|
190 |
+
# Crop to exact total_length_ms
|
191 |
+
looped_music = looped_music[:total_length_ms]
|
192 |
+
|
193 |
+
# Create 2s intro for music before speech
|
194 |
+
final_mix = looped_music.overlay(spoken, position=2000)
|
195 |
+
|
196 |
+
return final_mix
|
197 |
+
|
198 |
def main():
|
199 |
+
# Move set_page_config to the top if needed
|
200 |
+
st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
|
|
|
|
|
|
|
201 |
|
|
|
202 |
st.markdown(
|
203 |
"""
|
204 |
<style>
|
|
|
232 |
"MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
|
233 |
"Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
|
234 |
"### How to use:\n"
|
235 |
+
"1. **Provide one source:** PDF, URL, YouTube link, or a Topic to Research.\n"
|
236 |
"2. **Choose the tone and the target duration.**\n"
|
237 |
"3. **Click 'Generate Podcast'** to produce your podcast.\n\n"
|
238 |
"**After** the audio is generated, you can **edit** the transcript \n"
|
239 |
"and **re-generate** the audio with your edits if needed.\n\n"
|
240 |
+
"⏳**Please be patient while your podcast is being generated.** It involves content analysis, script creation, "
|
|
|
|
|
|
|
|
|
|
|
241 |
"and high-quality audio synthesis, which may take a few minutes.\n\n"
|
242 |
+
"🔥 **Ready to create your personalized podcast?** Give it a try now!"
|
243 |
)
|
244 |
|
245 |
col1, col2 = st.columns(2)
|
|
|
252 |
tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
|
253 |
length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
|
254 |
|
|
|
255 |
if "audio_bytes" not in st.session_state:
|
256 |
st.session_state["audio_bytes"] = None
|
257 |
if "transcript" not in st.session_state:
|
|
|
260 |
generate_button = st.button("Generate Podcast")
|
261 |
|
262 |
if generate_button:
|
|
|
263 |
progress_bar = st.progress(0)
|
264 |
progress_text = st.empty()
|
265 |
|
|
|
266 |
progress_text.write("Alright, let's get started...")
|
267 |
progress_bar.progress(10)
|
268 |
time.sleep(1.0)
|
|
|
285 |
|
286 |
if audio_bytes is None:
|
287 |
st.error(transcript)
|
|
|
288 |
st.session_state["audio_bytes"] = None
|
289 |
st.session_state["transcript"] = None
|
290 |
else:
|
|
|
292 |
st.session_state["audio_bytes"] = audio_bytes
|
293 |
st.session_state["transcript"] = transcript
|
294 |
|
|
|
295 |
if st.session_state["audio_bytes"]:
|
|
|
296 |
st.audio(st.session_state["audio_bytes"], format='audio/mp3')
|
|
|
297 |
st.download_button(
|
298 |
label="Download Podcast (MP3)",
|
299 |
data=st.session_state["audio_bytes"],
|
|
|
301 |
mime="audio/mpeg"
|
302 |
)
|
303 |
|
|
|
304 |
st.markdown("### Generated Transcript (Editable)")
|
305 |
edited_text = st.text_area(
|
306 |
"Feel free to tweak lines, fix errors, or reword anything.",
|
|
|
308 |
height=300
|
309 |
)
|
310 |
|
|
|
311 |
if st.button("Regenerate Audio From Edited Text"):
|
312 |
regen_bar = st.progress(0)
|
313 |
regen_text = st.empty()
|
|
|
320 |
regen_bar.progress(60)
|
321 |
time.sleep(1.0)
|
322 |
|
|
|
323 |
dialogue_items = parse_user_edited_transcript(edited_text)
|
324 |
new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items)
|
325 |
|
|
|
334 |
regen_text.write("All set!")
|
335 |
st.success("Regenerated audio below:")
|
336 |
|
|
|
337 |
st.session_state["audio_bytes"] = new_audio_bytes
|
338 |
st.session_state["transcript"] = new_transcript
|
339 |
|