Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import time
|
3 |
import re
|
@@ -5,28 +7,41 @@ import os
|
|
5 |
import tempfile
|
6 |
import pypdf
|
7 |
from pydub import AudioSegment
|
|
|
8 |
from utils import (
|
9 |
-
generate_script,
|
10 |
-
generate_audio_mp3,
|
11 |
-
truncate_text,
|
12 |
-
extract_text_from_url,
|
13 |
-
transcribe_youtube_video,
|
14 |
research_topic
|
15 |
)
|
16 |
from prompts import SYSTEM_PROMPT
|
17 |
|
|
|
18 |
def parse_user_edited_transcript(edited_text: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
pattern = r"\*\*(Jane|John)\*\*:\s*(.+)"
|
20 |
matches = re.findall(pattern, edited_text)
|
21 |
if not matches:
|
22 |
-
# If
|
23 |
return [("Jane", edited_text)]
|
24 |
return matches
|
25 |
|
|
|
26 |
def regenerate_audio_from_dialogue(dialogue_items):
|
|
|
|
|
|
|
|
|
27 |
audio_segments = []
|
28 |
transcript = ""
|
29 |
-
crossfade_duration = 50 # ms
|
30 |
|
31 |
for speaker, line_text in dialogue_items:
|
32 |
audio_file = generate_audio_mp3(line_text, speaker)
|
@@ -38,6 +53,7 @@ def regenerate_audio_from_dialogue(dialogue_items):
|
|
38 |
if not audio_segments:
|
39 |
return None, "No audio segments were generated."
|
40 |
|
|
|
41 |
combined = audio_segments[0]
|
42 |
for seg in audio_segments[1:]:
|
43 |
combined = combined.append(seg, crossfade=crossfade_duration)
|
@@ -48,7 +64,17 @@ def regenerate_audio_from_dialogue(dialogue_items):
|
|
48 |
|
49 |
return final_mp3_path, transcript
|
50 |
|
|
|
51 |
def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
53 |
if sum(sources) > 1:
|
54 |
return None, "Provide only one input (PDF, URL, YouTube, or Research topic)."
|
@@ -57,6 +83,7 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
57 |
|
58 |
text = ""
|
59 |
if file:
|
|
|
60 |
try:
|
61 |
if not file.name.lower().endswith('.pdf'):
|
62 |
return None, "Please upload a PDF file."
|
@@ -65,6 +92,7 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
65 |
except Exception as e:
|
66 |
return None, f"Error reading PDF: {str(e)}"
|
67 |
elif url:
|
|
|
68 |
try:
|
69 |
text = extract_text_from_url(url)
|
70 |
if not text:
|
@@ -72,6 +100,7 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
72 |
except Exception as e:
|
73 |
return None, f"Error extracting text from URL: {str(e)}"
|
74 |
elif video_url:
|
|
|
75 |
try:
|
76 |
text = transcribe_youtube_video(video_url)
|
77 |
if not text:
|
@@ -79,6 +108,7 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
79 |
except Exception as e:
|
80 |
return None, f"Error transcribing YouTube video: {str(e)}"
|
81 |
elif research_topic_input:
|
|
|
82 |
try:
|
83 |
text = research_topic(research_topic_input)
|
84 |
if not text:
|
@@ -86,12 +116,14 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
86 |
except Exception as e:
|
87 |
return None, f"Error researching topic: {str(e)}"
|
88 |
|
|
|
89 |
try:
|
90 |
text = truncate_text(text)
|
91 |
script = generate_script(SYSTEM_PROMPT, text, tone, length)
|
92 |
except Exception as e:
|
93 |
return None, f"Error generating script: {str(e)}"
|
94 |
|
|
|
95 |
audio_segments = []
|
96 |
transcript = ""
|
97 |
crossfade_duration = 50 # ms
|
@@ -119,12 +151,26 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
|
119 |
except Exception as e:
|
120 |
return None, f"Error generating audio: {str(e)}"
|
121 |
|
|
|
122 |
def main():
|
123 |
-
st.set_page_config(
|
|
|
|
|
|
|
124 |
|
125 |
st.title("🎙 MyPod - AI-based Podcast Generator")
|
126 |
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
col1, col2 = st.columns(2)
|
129 |
with col1:
|
130 |
file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"])
|
@@ -138,48 +184,43 @@ def main():
|
|
138 |
generate_button = st.button("Generate Podcast")
|
139 |
|
140 |
if generate_button:
|
141 |
-
# Show a
|
142 |
progress_bar = st.progress(0)
|
143 |
progress_text = st.empty()
|
144 |
|
145 |
-
#
|
146 |
-
# Step 1: Start
|
147 |
progress_text.write("Alright, let's get started...")
|
148 |
progress_bar.progress(10)
|
149 |
-
time.sleep(1.
|
150 |
|
151 |
-
# Step 2: Some cheeky text
|
152 |
progress_text.write("Working on the magic. Hang tight!")
|
153 |
progress_bar.progress(40)
|
154 |
-
time.sleep(1.
|
155 |
|
156 |
-
# Step 3: Almost there
|
157 |
progress_text.write("Almost done. Adding a dash of awesomeness...")
|
158 |
progress_bar.progress(70)
|
159 |
-
time.sleep(1.
|
160 |
|
161 |
-
#
|
162 |
podcast_file, transcript = generate_podcast(
|
163 |
file, url, video_url, research_topic_input, tone, length
|
164 |
)
|
165 |
-
|
|
|
|
|
|
|
166 |
|
167 |
if podcast_file is None:
|
168 |
-
# Reset progress to 0 if error
|
169 |
-
progress_bar.progress(100)
|
170 |
st.error(transcript)
|
171 |
-
return
|
172 |
else:
|
173 |
-
progress_bar.progress(100)
|
174 |
-
progress_text.write("Done!")
|
175 |
-
|
176 |
st.success("Podcast generated successfully!")
|
|
|
177 |
audio_file = open(podcast_file, 'rb')
|
178 |
audio_bytes = audio_file.read()
|
179 |
audio_file.close()
|
180 |
st.audio(audio_bytes, format='audio/mp3')
|
181 |
|
182 |
-
# Show transcript in
|
183 |
st.markdown("### Generated Transcript (Editable)")
|
184 |
edited_text = st.text_area(
|
185 |
"Feel free to tweak lines, fix errors, or reword anything.",
|
@@ -187,8 +228,8 @@ def main():
|
|
187 |
height=300
|
188 |
)
|
189 |
|
|
|
190 |
if st.button("Regenerate Audio From Edited Text"):
|
191 |
-
# Another pseudo progress bar for regeneration
|
192 |
regen_bar = st.progress(0)
|
193 |
regen_text = st.empty()
|
194 |
|
@@ -200,7 +241,7 @@ def main():
|
|
200 |
regen_bar.progress(60)
|
201 |
time.sleep(1.0)
|
202 |
|
203 |
-
# Parse
|
204 |
dialogue_items = parse_user_edited_transcript(edited_text)
|
205 |
new_audio_path, new_transcript = regenerate_audio_from_dialogue(dialogue_items)
|
206 |
|
@@ -227,5 +268,6 @@ def main():
|
|
227 |
# Clean up the original file
|
228 |
os.remove(podcast_file)
|
229 |
|
|
|
230 |
if __name__ == "__main__":
|
231 |
main()
|
|
|
1 |
+
# app.py
|
2 |
+
|
3 |
import streamlit as st
|
4 |
import time
|
5 |
import re
|
|
|
7 |
import tempfile
|
8 |
import pypdf
|
9 |
from pydub import AudioSegment
|
10 |
+
|
11 |
from utils import (
|
12 |
+
generate_script,
|
13 |
+
generate_audio_mp3,
|
14 |
+
truncate_text,
|
15 |
+
extract_text_from_url,
|
16 |
+
transcribe_youtube_video,
|
17 |
research_topic
|
18 |
)
|
19 |
from prompts import SYSTEM_PROMPT
|
20 |
|
21 |
+
|
22 |
def parse_user_edited_transcript(edited_text: str):
|
23 |
+
"""
|
24 |
+
Looks for lines like:
|
25 |
+
**Jane**: Hello
|
26 |
+
**John**: Sure, I'd love to talk about that.
|
27 |
+
Returns a list of (speaker, text).
|
28 |
+
"""
|
29 |
pattern = r"\*\*(Jane|John)\*\*:\s*(.+)"
|
30 |
matches = re.findall(pattern, edited_text)
|
31 |
if not matches:
|
32 |
+
# If user changed the format drastically, treat entire text as Jane
|
33 |
return [("Jane", edited_text)]
|
34 |
return matches
|
35 |
|
36 |
+
|
37 |
def regenerate_audio_from_dialogue(dialogue_items):
|
38 |
+
"""
|
39 |
+
Re-generates multi-speaker audio from user-edited text.
|
40 |
+
Returns final_mp3_path, updated_transcript.
|
41 |
+
"""
|
42 |
audio_segments = []
|
43 |
transcript = ""
|
44 |
+
crossfade_duration = 50 # in ms
|
45 |
|
46 |
for speaker, line_text in dialogue_items:
|
47 |
audio_file = generate_audio_mp3(line_text, speaker)
|
|
|
53 |
if not audio_segments:
|
54 |
return None, "No audio segments were generated."
|
55 |
|
56 |
+
# Combine with crossfade
|
57 |
combined = audio_segments[0]
|
58 |
for seg in audio_segments[1:]:
|
59 |
combined = combined.append(seg, crossfade=crossfade_duration)
|
|
|
64 |
|
65 |
return final_mp3_path, transcript
|
66 |
|
67 |
+
|
68 |
def generate_podcast(file, url, video_url, research_topic_input, tone, length):
|
69 |
+
"""
|
70 |
+
Creates a multi-speaker podcast from:
|
71 |
+
- PDF
|
72 |
+
- URL
|
73 |
+
- YouTube video
|
74 |
+
- or a research topic input.
|
75 |
+
Returns (podcast_file_path, transcript_str).
|
76 |
+
"""
|
77 |
+
# Ensure only one input source
|
78 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
79 |
if sum(sources) > 1:
|
80 |
return None, "Provide only one input (PDF, URL, YouTube, or Research topic)."
|
|
|
83 |
|
84 |
text = ""
|
85 |
if file:
|
86 |
+
# Handle PDF
|
87 |
try:
|
88 |
if not file.name.lower().endswith('.pdf'):
|
89 |
return None, "Please upload a PDF file."
|
|
|
92 |
except Exception as e:
|
93 |
return None, f"Error reading PDF: {str(e)}"
|
94 |
elif url:
|
95 |
+
# Handle URL
|
96 |
try:
|
97 |
text = extract_text_from_url(url)
|
98 |
if not text:
|
|
|
100 |
except Exception as e:
|
101 |
return None, f"Error extracting text from URL: {str(e)}"
|
102 |
elif video_url:
|
103 |
+
# Handle YouTube
|
104 |
try:
|
105 |
text = transcribe_youtube_video(video_url)
|
106 |
if not text:
|
|
|
108 |
except Exception as e:
|
109 |
return None, f"Error transcribing YouTube video: {str(e)}"
|
110 |
elif research_topic_input:
|
111 |
+
# Handle research topic
|
112 |
try:
|
113 |
text = research_topic(research_topic_input)
|
114 |
if not text:
|
|
|
116 |
except Exception as e:
|
117 |
return None, f"Error researching topic: {str(e)}"
|
118 |
|
119 |
+
# Generate the multi-speaker script
|
120 |
try:
|
121 |
text = truncate_text(text)
|
122 |
script = generate_script(SYSTEM_PROMPT, text, tone, length)
|
123 |
except Exception as e:
|
124 |
return None, f"Error generating script: {str(e)}"
|
125 |
|
126 |
+
# Convert the script.dialogue to audio
|
127 |
audio_segments = []
|
128 |
transcript = ""
|
129 |
crossfade_duration = 50 # ms
|
|
|
151 |
except Exception as e:
|
152 |
return None, f"Error generating audio: {str(e)}"
|
153 |
|
154 |
+
|
155 |
def main():
|
156 |
+
st.set_page_config(
|
157 |
+
page_title="MyPod - AI-based Podcast Generator",
|
158 |
+
layout="centered"
|
159 |
+
)
|
160 |
|
161 |
st.title("🎙 MyPod - AI-based Podcast Generator")
|
162 |
|
163 |
+
st.markdown(
|
164 |
+
"""
|
165 |
+
Welcome to **MyPod**, your AI-powered podcast generator!
|
166 |
+
Provide a PDF, URL, YouTube link, or enter a research topic,
|
167 |
+
choose a tone and target length, and click "Generate Podcast."
|
168 |
+
|
169 |
+
**After** the audio is generated, you can **edit** the transcript
|
170 |
+
and **re-generate** the audio with your edits if needed.
|
171 |
+
"""
|
172 |
+
)
|
173 |
+
|
174 |
col1, col2 = st.columns(2)
|
175 |
with col1:
|
176 |
file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"])
|
|
|
184 |
generate_button = st.button("Generate Podcast")
|
185 |
|
186 |
if generate_button:
|
187 |
+
# Show a pseudo progress bar for user engagement
|
188 |
progress_bar = st.progress(0)
|
189 |
progress_text = st.empty()
|
190 |
|
191 |
+
# Steps to pretend some progress:
|
|
|
192 |
progress_text.write("Alright, let's get started...")
|
193 |
progress_bar.progress(10)
|
194 |
+
time.sleep(1.0)
|
195 |
|
|
|
196 |
progress_text.write("Working on the magic. Hang tight!")
|
197 |
progress_bar.progress(40)
|
198 |
+
time.sleep(1.0)
|
199 |
|
|
|
200 |
progress_text.write("Almost done. Adding a dash of awesomeness...")
|
201 |
progress_bar.progress(70)
|
202 |
+
time.sleep(1.0)
|
203 |
|
204 |
+
# Actual generation
|
205 |
podcast_file, transcript = generate_podcast(
|
206 |
file, url, video_url, research_topic_input, tone, length
|
207 |
)
|
208 |
+
|
209 |
+
time.sleep(1.0)
|
210 |
+
progress_bar.progress(100)
|
211 |
+
progress_text.write("Done!")
|
212 |
|
213 |
if podcast_file is None:
|
|
|
|
|
214 |
st.error(transcript)
|
|
|
215 |
else:
|
|
|
|
|
|
|
216 |
st.success("Podcast generated successfully!")
|
217 |
+
# Show the audio
|
218 |
audio_file = open(podcast_file, 'rb')
|
219 |
audio_bytes = audio_file.read()
|
220 |
audio_file.close()
|
221 |
st.audio(audio_bytes, format='audio/mp3')
|
222 |
|
223 |
+
# Show the transcript in a text area for editing
|
224 |
st.markdown("### Generated Transcript (Editable)")
|
225 |
edited_text = st.text_area(
|
226 |
"Feel free to tweak lines, fix errors, or reword anything.",
|
|
|
228 |
height=300
|
229 |
)
|
230 |
|
231 |
+
# Regenerate button
|
232 |
if st.button("Regenerate Audio From Edited Text"):
|
|
|
233 |
regen_bar = st.progress(0)
|
234 |
regen_text = st.empty()
|
235 |
|
|
|
241 |
regen_bar.progress(60)
|
242 |
time.sleep(1.0)
|
243 |
|
244 |
+
# Parse & regenerate
|
245 |
dialogue_items = parse_user_edited_transcript(edited_text)
|
246 |
new_audio_path, new_transcript = regenerate_audio_from_dialogue(dialogue_items)
|
247 |
|
|
|
268 |
# Clean up the original file
|
269 |
os.remove(podcast_file)
|
270 |
|
271 |
+
|
272 |
if __name__ == "__main__":
|
273 |
main()
|