Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,7 @@ import re
|
|
6 |
import os
|
7 |
import tempfile
|
8 |
import pypdf
|
9 |
-
from pydub import AudioSegment
|
10 |
-
from pydub import effects # for normalizing volume if needed
|
11 |
|
12 |
from utils import (
|
13 |
generate_script,
|
@@ -19,6 +18,7 @@ from utils import (
|
|
19 |
)
|
20 |
from prompts import SYSTEM_PROMPT
|
21 |
|
|
|
22 |
def parse_user_edited_transcript(edited_text: str):
|
23 |
"""
|
24 |
Looks for lines like:
|
@@ -32,13 +32,13 @@ def parse_user_edited_transcript(edited_text: str):
|
|
32 |
return [("Jane", edited_text)]
|
33 |
return matches
|
34 |
|
|
|
35 |
def regenerate_audio_from_dialogue(dialogue_items):
|
36 |
"""
|
37 |
Re-generates multi-speaker audio from user-edited text,
|
38 |
-
then mixes with background music
|
39 |
Returns final audio bytes and updated transcript.
|
40 |
"""
|
41 |
-
# 1) Create spoken segments
|
42 |
audio_segments = []
|
43 |
transcript = ""
|
44 |
crossfade_duration = 50 # in ms
|
@@ -53,15 +53,15 @@ def regenerate_audio_from_dialogue(dialogue_items):
|
|
53 |
if not audio_segments:
|
54 |
return None, "No audio segments were generated."
|
55 |
|
56 |
-
#
|
57 |
combined_spoken = audio_segments[0]
|
58 |
for seg in audio_segments[1:]:
|
59 |
combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
|
60 |
|
61 |
-
#
|
62 |
final_mix = mix_with_bg_music(combined_spoken)
|
63 |
|
64 |
-
#
|
65 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
66 |
final_mix.export(temp_audio.name, format="mp3")
|
67 |
final_mp3_path = temp_audio.name
|
@@ -72,6 +72,7 @@ def regenerate_audio_from_dialogue(dialogue_items):
|
|
72 |
|
73 |
return audio_bytes, transcript
|
74 |
|
|
|
75 |
def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
76 |
"""
|
77 |
Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
|
@@ -83,7 +84,6 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
83 |
if not any(sources):
|
84 |
return None, "Please provide at least one source."
|
85 |
|
86 |
-
# 1) Fetch text
|
87 |
text = ""
|
88 |
if file:
|
89 |
try:
|
@@ -115,14 +115,13 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
115 |
except Exception as e:
|
116 |
return None, f"Error researching topic: {str(e)}"
|
117 |
|
118 |
-
#
|
119 |
try:
|
120 |
text = truncate_text(text)
|
121 |
script = generate_script(SYSTEM_PROMPT, text, tone, length)
|
122 |
except Exception as e:
|
123 |
return None, f"Error generating script: {str(e)}"
|
124 |
|
125 |
-
# 3) Convert dialogue to spoken segments
|
126 |
audio_segments = []
|
127 |
transcript = ""
|
128 |
crossfade_duration = 50 # ms
|
@@ -138,12 +137,11 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
138 |
if not audio_segments:
|
139 |
return None, "No audio segments generated."
|
140 |
|
141 |
-
# Combine
|
142 |
combined_spoken = audio_segments[0]
|
143 |
for seg in audio_segments[1:]:
|
144 |
combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
|
145 |
|
146 |
-
# Mix with
|
147 |
final_mix = mix_with_bg_music(combined_spoken)
|
148 |
|
149 |
# Export to bytes
|
@@ -160,6 +158,7 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
160 |
except Exception as e:
|
161 |
return None, f"Error generating audio: {str(e)}"
|
162 |
|
|
|
163 |
def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
|
164 |
"""
|
165 |
Mixes 'spoken' with bg_music.mp3 in the root folder:
|
@@ -167,8 +166,7 @@ def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
|
|
167 |
2) Loop the music if it's shorter than the final audio length.
|
168 |
3) Lower the music volume so the speech is clear.
|
169 |
"""
|
170 |
-
|
171 |
-
bg_music_path = "bg_music.mp3" # root-level file
|
172 |
|
173 |
try:
|
174 |
bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
|
@@ -176,82 +174,56 @@ def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
|
|
176 |
print("[ERROR] Failed to load background music:", e)
|
177 |
return spoken
|
178 |
|
179 |
-
# Lower
|
180 |
-
bg_music = bg_music - 14.0
|
181 |
|
182 |
-
# total_length_ms = spoken length + 2000ms intro
|
183 |
total_length_ms = len(spoken) + 2000
|
184 |
-
|
185 |
-
# Loop the music if it's shorter than total_length_ms
|
186 |
looped_music = AudioSegment.empty()
|
187 |
while len(looped_music) < total_length_ms:
|
188 |
looped_music += bg_music
|
189 |
|
190 |
-
# Crop to exact total_length_ms
|
191 |
looped_music = looped_music[:total_length_ms]
|
192 |
|
193 |
-
#
|
194 |
final_mix = looped_music.overlay(spoken, position=2000)
|
195 |
|
196 |
return final_mix
|
197 |
|
|
|
198 |
def main():
|
199 |
-
# Move set_page_config to the top if needed
|
200 |
st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
<style>
|
205 |
-
:root {
|
206 |
-
color-scheme: light dark;
|
207 |
-
}
|
208 |
-
body {
|
209 |
-
background-color: #f0f2f6;
|
210 |
-
color: #222;
|
211 |
-
}
|
212 |
-
.css-18e3th9 {
|
213 |
-
background-color: #e8eaf2;
|
214 |
-
}
|
215 |
-
.stButton>button {
|
216 |
-
background-color: #0066cc;
|
217 |
-
color: white;
|
218 |
-
border-radius: 8px;
|
219 |
-
}
|
220 |
-
.stProgress>div>div>div>div {
|
221 |
-
background-color: #0066cc;
|
222 |
-
}
|
223 |
-
</style>
|
224 |
-
""",
|
225 |
-
unsafe_allow_html=True
|
226 |
-
)
|
227 |
-
|
228 |
-
st.title("🎙 MyPod - AI-based Podcast Generator")
|
229 |
|
230 |
st.markdown(
|
231 |
"Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉\n\n"
|
232 |
"MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
|
233 |
"Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
|
234 |
"### How to use:\n"
|
235 |
-
"1. **Provide one source:** PDF, URL, YouTube link, or a Topic to Research.\n"
|
236 |
"2. **Choose the tone and the target duration.**\n"
|
237 |
-
"3. **Click 'Generate Podcast'** to produce your podcast.\n\n"
|
238 |
-
"**
|
239 |
-
"
|
240 |
-
"
|
|
|
|
|
|
|
241 |
"and high-quality audio synthesis, which may take a few minutes.\n\n"
|
242 |
-
"🔥 **Ready to create your personalized podcast?** Give it a try now!"
|
243 |
)
|
244 |
|
245 |
col1, col2 = st.columns(2)
|
246 |
with col1:
|
247 |
file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"])
|
248 |
url = st.text_input("Or Enter URL")
|
249 |
-
video_url = st.text_input("Or Enter YouTube Link")
|
250 |
with col2:
|
251 |
research_topic_input = st.text_input("Or Research a Topic")
|
252 |
tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
|
253 |
length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
|
254 |
|
|
|
255 |
if "audio_bytes" not in st.session_state:
|
256 |
st.session_state["audio_bytes"] = None
|
257 |
if "transcript" not in st.session_state:
|
|
|
6 |
import os
|
7 |
import tempfile
|
8 |
import pypdf
|
9 |
+
from pydub import AudioSegment, effects
|
|
|
10 |
|
11 |
from utils import (
|
12 |
generate_script,
|
|
|
18 |
)
|
19 |
from prompts import SYSTEM_PROMPT
|
20 |
|
21 |
+
|
22 |
def parse_user_edited_transcript(edited_text: str):
|
23 |
"""
|
24 |
Looks for lines like:
|
|
|
32 |
return [("Jane", edited_text)]
|
33 |
return matches
|
34 |
|
35 |
+
|
36 |
def regenerate_audio_from_dialogue(dialogue_items):
|
37 |
"""
|
38 |
Re-generates multi-speaker audio from user-edited text,
|
39 |
+
then mixes with background music in the root folder (bg_music.mp3).
|
40 |
Returns final audio bytes and updated transcript.
|
41 |
"""
|
|
|
42 |
audio_segments = []
|
43 |
transcript = ""
|
44 |
crossfade_duration = 50 # in ms
|
|
|
53 |
if not audio_segments:
|
54 |
return None, "No audio segments were generated."
|
55 |
|
56 |
+
# Combine spoken segments
|
57 |
combined_spoken = audio_segments[0]
|
58 |
for seg in audio_segments[1:]:
|
59 |
combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
|
60 |
|
61 |
+
# Mix with background music
|
62 |
final_mix = mix_with_bg_music(combined_spoken)
|
63 |
|
64 |
+
# Export to bytes
|
65 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
66 |
final_mix.export(temp_audio.name, format="mp3")
|
67 |
final_mp3_path = temp_audio.name
|
|
|
72 |
|
73 |
return audio_bytes, transcript
|
74 |
|
75 |
+
|
76 |
def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
77 |
"""
|
78 |
Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
|
|
|
84 |
if not any(sources):
|
85 |
return None, "Please provide at least one source."
|
86 |
|
|
|
87 |
text = ""
|
88 |
if file:
|
89 |
try:
|
|
|
115 |
except Exception as e:
|
116 |
return None, f"Error researching topic: {str(e)}"
|
117 |
|
118 |
+
# Generate script
|
119 |
try:
|
120 |
text = truncate_text(text)
|
121 |
script = generate_script(SYSTEM_PROMPT, text, tone, length)
|
122 |
except Exception as e:
|
123 |
return None, f"Error generating script: {str(e)}"
|
124 |
|
|
|
125 |
audio_segments = []
|
126 |
transcript = ""
|
127 |
crossfade_duration = 50 # ms
|
|
|
137 |
if not audio_segments:
|
138 |
return None, "No audio segments generated."
|
139 |
|
|
|
140 |
combined_spoken = audio_segments[0]
|
141 |
for seg in audio_segments[1:]:
|
142 |
combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
|
143 |
|
144 |
+
# Mix with bg music
|
145 |
final_mix = mix_with_bg_music(combined_spoken)
|
146 |
|
147 |
# Export to bytes
|
|
|
158 |
except Exception as e:
|
159 |
return None, f"Error generating audio: {str(e)}"
|
160 |
|
161 |
+
|
162 |
def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
|
163 |
"""
|
164 |
Mixes 'spoken' with bg_music.mp3 in the root folder:
|
|
|
166 |
2) Loop the music if it's shorter than the final audio length.
|
167 |
3) Lower the music volume so the speech is clear.
|
168 |
"""
|
169 |
+
bg_music_path = "bg_music.mp3" # in root folder
|
|
|
170 |
|
171 |
try:
|
172 |
bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
|
|
|
174 |
print("[ERROR] Failed to load background music:", e)
|
175 |
return spoken
|
176 |
|
177 |
+
bg_music = bg_music - 14.0 # Lower volume (e.g. -14 dB)
|
|
|
178 |
|
|
|
179 |
total_length_ms = len(spoken) + 2000
|
|
|
|
|
180 |
looped_music = AudioSegment.empty()
|
181 |
while len(looped_music) < total_length_ms:
|
182 |
looped_music += bg_music
|
183 |
|
|
|
184 |
looped_music = looped_music[:total_length_ms]
|
185 |
|
186 |
+
# Overlay spoken at 2000ms so we get 2s of music first
|
187 |
final_mix = looped_music.overlay(spoken, position=2000)
|
188 |
|
189 |
return final_mix
|
190 |
|
191 |
+
|
192 |
def main():
|
|
|
193 |
st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
|
194 |
|
195 |
+
# Use smaller font for the main header
|
196 |
+
st.markdown("## MyPod - AI powered Podcast Generator")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
st.markdown(
|
199 |
"Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉\n\n"
|
200 |
"MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
|
201 |
"Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
|
202 |
"### How to use:\n"
|
203 |
+
"1. **Provide one source:** PDF, URL, YouTube link (Requires User Auth - Work in Progress), or a Topic to Research.\n"
|
204 |
"2. **Choose the tone and the target duration.**\n"
|
205 |
+
"3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
|
206 |
+
"**Research a Topic:** Please be as detailed as possible in your topic statement. If it's too niche or specific, "
|
207 |
+
"you might not get the desired outcome. We'll fetch information from Wikipedia and RSS feeds (BBC, CNN, Associated Press, "
|
208 |
+
"NDTV, Times of India, The Hindu, Economic Times, Google News) or the LLM knowledge base to get recent info about the topic.\n\n"
|
209 |
+
"**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
|
210 |
+
"**Note:** YouTube transcription uses Whisper on CPU and may take longer for very long videos.\n\n"
|
211 |
+
"⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
|
212 |
"and high-quality audio synthesis, which may take a few minutes.\n\n"
|
213 |
+
"🔥 **Ready to create your personalized podcast?** Give it a try now and let the magic happen! 🔥"
|
214 |
)
|
215 |
|
216 |
col1, col2 = st.columns(2)
|
217 |
with col1:
|
218 |
file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"])
|
219 |
url = st.text_input("Or Enter URL")
|
220 |
+
video_url = st.text_input("Or Enter YouTube Link (Requires User Auth - Work in Progress)")
|
221 |
with col2:
|
222 |
research_topic_input = st.text_input("Or Research a Topic")
|
223 |
tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
|
224 |
length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
|
225 |
|
226 |
+
# Store results in session_state
|
227 |
if "audio_bytes" not in st.session_state:
|
228 |
st.session_state["audio_bytes"] = None
|
229 |
if "transcript" not in st.session_state:
|