# app.py import streamlit as st import time import re import os import tempfile import pypdf from pydub import AudioSegment from utils import ( generate_script, generate_audio_mp3, truncate_text, extract_text_from_url, transcribe_youtube_video, research_topic ) from prompts import SYSTEM_PROMPT def parse_user_edited_transcript(edited_text: str): """ Looks for lines like: **Jane**: Hello **John**: Sure, I'd love to talk about that. Returns a list of (speaker, text). """ pattern = r"\*\*(Jane|John)\*\*:\s*(.+)" matches = re.findall(pattern, edited_text) if not matches: # If user changed the format drastically, treat entire text as Jane return [("Jane", edited_text)] return matches def regenerate_audio_from_dialogue(dialogue_items): """ Re-generates multi-speaker audio from user-edited text. Returns final audio bytes and updated transcript. """ audio_segments = [] transcript = "" crossfade_duration = 50 # in ms for speaker, line_text in dialogue_items: audio_file = generate_audio_mp3(line_text, speaker) seg = AudioSegment.from_file(audio_file, format="mp3") audio_segments.append(seg) transcript += f"**{speaker}**: {line_text}\n\n" os.remove(audio_file) if not audio_segments: return None, "No audio segments were generated." # Combine with crossfade combined = audio_segments[0] for seg in audio_segments[1:]: combined = combined.append(seg, crossfade=crossfade_duration) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: combined.export(temp_audio.name, format="mp3") final_mp3_path = temp_audio.name # Read bytes and return them (so we have a real .mp3 to download) with open(final_mp3_path, "rb") as f: audio_bytes = f.read() os.remove(final_mp3_path) return audio_bytes, transcript def generate_podcast(file, url, video_url, research_topic_input, tone, length): """ Creates a multi-speaker podcast from: - PDF - URL - YouTube video - or a research topic input. Returns (audio_bytes, transcript_str). """ sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)] if sum(sources) > 1: return None, "Provide only one input (PDF, URL, YouTube, or Research topic)." if not any(sources): return None, "Please provide at least one source." text = "" if file: # Handle PDF try: if not file.name.lower().endswith('.pdf'): return None, "Please upload a PDF file." reader = pypdf.PdfReader(file.name) text = " ".join(page.extract_text() for page in reader.pages if page.extract_text()) except Exception as e: return None, f"Error reading PDF: {str(e)}" elif url: # Handle URL try: text = extract_text_from_url(url) if not text: return None, "Failed to extract text from URL." except Exception as e: return None, f"Error extracting text from URL: {str(e)}" elif video_url: # Handle YouTube try: text = transcribe_youtube_video(video_url) if not text: return None, "Failed to transcribe YouTube video." except Exception as e: return None, f"Error transcribing YouTube video: {str(e)}" elif research_topic_input: # Handle research topic try: text = research_topic(research_topic_input) if not text: return None, f"Sorry, no information found on '{research_topic_input}'." except Exception as e: return None, f"Error researching topic: {str(e)}" # Generate the multi-speaker script try: text = truncate_text(text) script = generate_script(SYSTEM_PROMPT, text, tone, length) except Exception as e: return None, f"Error generating script: {str(e)}" audio_segments = [] transcript = "" crossfade_duration = 50 # ms try: for item in script.dialogue: audio_file = generate_audio_mp3(item.text, item.speaker) seg = AudioSegment.from_file(audio_file, format="mp3") audio_segments.append(seg) transcript += f"**{item.speaker}**: {item.text}\n\n" os.remove(audio_file) if not audio_segments: return None, "No audio segments generated." combined = audio_segments[0] for seg in audio_segments[1:]: combined = combined.append(seg, crossfade=crossfade_duration) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: combined.export(temp_audio.name, format="mp3") final_mp3_path = temp_audio.name # Convert final mp3 to bytes with open(final_mp3_path, "rb") as f: audio_bytes = f.read() os.remove(final_mp3_path) return audio_bytes, transcript except Exception as e: return None, f"Error generating audio: {str(e)}" def main(): # Moved set_page_config to the very top of all Streamlit commands st.set_page_config( page_title="MyPod - AI-based Podcast Generator", layout="centered" ) # Enable "light or dark" theme via custom CSS st.markdown( """ """, unsafe_allow_html=True ) st.title("🎙 MyPod - AI-based Podcast Generator") st.markdown( "Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉\n\n" "MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n" "Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n" "### How to use:\n" "1. **Provide one source:** PDF, URL, YouTube link (Requires User Auth - Work in Progress), or a Topic to Research.\n" "2. **Choose the tone and the target duration.**\n" "3. **Click 'Generate Podcast'** to produce your podcast.\n\n" "**After** the audio is generated, you can **edit** the transcript \n" "and **re-generate** the audio with your edits if needed.\n\n" "**Research a Topic:** Please be as detailed as possible in your topic statement. If it's too niche or specific, " "you might not get the desired outcome. We'll fetch information from Wikipedia and RSS feeds (BBC, CNN, Associated Press, " "NDTV, Times of India, The Hindu, Economic Times, Google News) or the LLM knowledge base to get recent info about the topic.\n\n" "**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n" "**Note:** YouTube transcription uses Whisper on CPU and may take longer for very long videos.\n\n" "⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, " "and high-quality audio synthesis, which may take a few minutes.\n\n" "🔥 **Ready to create your personalized podcast?** Give it a try now and let the magic happen! 🔥" ) col1, col2 = st.columns(2) with col1: file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"]) url = st.text_input("Or Enter URL") video_url = st.text_input("Or Enter YouTube Link") with col2: research_topic_input = st.text_input("Or Research a Topic") tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2) length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0) # Use session_state to avoid losing results if user clicks away if "audio_bytes" not in st.session_state: st.session_state["audio_bytes"] = None if "transcript" not in st.session_state: st.session_state["transcript"] = None generate_button = st.button("Generate Podcast") if generate_button: # Show a pseudo progress bar for user engagement progress_bar = st.progress(0) progress_text = st.empty() # Steps to pretend some progress: progress_text.write("Alright, let's get started...") progress_bar.progress(10) time.sleep(1.0) progress_text.write("Working on the magic. Hang tight!") progress_bar.progress(40) time.sleep(1.0) progress_text.write("Almost done. Adding a dash of awesomeness...") progress_bar.progress(70) time.sleep(1.0) audio_bytes, transcript = generate_podcast( file, url, video_url, research_topic_input, tone, length ) time.sleep(1.0) progress_bar.progress(100) progress_text.write("Done!") if audio_bytes is None: st.error(transcript) # Clear session state st.session_state["audio_bytes"] = None st.session_state["transcript"] = None else: st.success("Podcast generated successfully!") st.session_state["audio_bytes"] = audio_bytes st.session_state["transcript"] = transcript # Check if we have a stored result if st.session_state["audio_bytes"]: # Show the audio st.audio(st.session_state["audio_bytes"], format='audio/mp3') # Provide a download button with .mp3 extension st.download_button( label="Download Podcast (MP3)", data=st.session_state["audio_bytes"], file_name="my_podcast.mp3", mime="audio/mpeg" ) # Show the transcript in a text area for editing st.markdown("### Generated Transcript (Editable)") edited_text = st.text_area( "Feel free to tweak lines, fix errors, or reword anything.", value=st.session_state["transcript"], height=300 ) # Regenerate button if st.button("Regenerate Audio From Edited Text"): regen_bar = st.progress(0) regen_text = st.empty() regen_text.write("Let's do this revision!") regen_bar.progress(25) time.sleep(1.0) regen_text.write("Cooking up fresh audio...") regen_bar.progress(60) time.sleep(1.0) # Parse & regenerate dialogue_items = parse_user_edited_transcript(edited_text) new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items) regen_bar.progress(90) time.sleep(1.0) if new_audio_bytes is None: regen_bar.progress(100) st.error(new_transcript) else: regen_bar.progress(100) regen_text.write("All set!") st.success("Regenerated audio below:") # Store updated st.session_state["audio_bytes"] = new_audio_bytes st.session_state["transcript"] = new_transcript st.audio(new_audio_bytes, format='audio/mp3') st.download_button( label="Download Edited Podcast (MP3)", data=new_audio_bytes, file_name="my_podcast_edited.mp3", mime="audio/mpeg" ) st.markdown(new_transcript) if __name__ == "__main__": main()