MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 7

Commit

337f622

verified ·

1 Parent(s): 81ea4ea

Upload app.py

Browse files

Files changed (1) hide show

app.py +436 -0

app.py ADDED Viewed

	@@ -0,0 +1,436 @@

+# app.py
+import streamlit as st
+import time
+import re
+import os
+import tempfile
+import pypdf
+from pydub import AudioSegment, effects
+import difflib  # For computing differences between texts
+from utils import (
+    generate_script,
+    generate_audio_mp3,
+    truncate_text,
+    extract_text_from_url,
+    transcribe_youtube_video,
+    research_topic
+)
+from prompts import SYSTEM_PROMPT
+def parse_user_edited_transcript(edited_text: str):
+    """
+    Looks for lines like:
+        **Jane**: Hello
+        **John**: Sure, I'd love to talk about that.
+    Returns a list of (speaker, text).
+    """
+    pattern = r"\*\*(Jane|John)\*\*:\s*(.+)"
+    matches = re.findall(pattern, edited_text)
+    if not matches:
+        return [("Jane", edited_text)]
+    return matches
+def regenerate_audio_from_dialogue(dialogue_items):
+    """
+    Re-generates multi-speaker audio from user-edited text,
+    then mixes with background music in the root folder (bg_music.mp3).
+    Returns final audio bytes and updated transcript.
+    """
+    audio_segments = []
+    transcript = ""
+    crossfade_duration = 50  # in ms
+    for speaker, line_text in dialogue_items:
+        audio_file = generate_audio_mp3(line_text, speaker)
+        seg = AudioSegment.from_file(audio_file, format="mp3")
+        audio_segments.append(seg)
+        transcript += f"**{speaker}**: {line_text}\n\n"
+        os.remove(audio_file)
+    if not audio_segments:
+        return None, "No audio segments were generated."
+    # Combine spoken segments
+    combined_spoken = audio_segments[0]
+    for seg in audio_segments[1:]:
+        combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
+    # Mix with background music
+    final_mix = mix_with_bg_music(combined_spoken)
+    # Export to bytes
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
+        final_mix.export(temp_audio.name, format="mp3")
+        final_mp3_path = temp_audio.name
+    with open(final_mp3_path, "rb") as f:
+        audio_bytes = f.read()
+    os.remove(final_mp3_path)
+    return audio_bytes, transcript
+def generate_podcast(file, url, video_url, research_topic_input, tone, length):
+    """
+    Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
+    Returns (audio_bytes, transcript_str), mixing with background music in root folder (bg_music.mp3).
+    """
+    sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
+    if sum(sources) > 1:
+        return None, "Provide only one input (PDF, URL, YouTube, or Research topic)."
+    if not any(sources):
+        return None, "Please provide at least one source."
+    text = ""
+    if file:
+        try:
+            if not file.name.lower().endswith('.pdf'):
+                return None, "Please upload a PDF file."
+            # Use the file-like object directly to read the PDF
+            reader = pypdf.PdfReader(file)
+            text = " ".join(page.extract_text() for page in reader.pages if page.extract_text())
+        except Exception as e:
+            return None, f"Error reading PDF: {str(e)}"
+    elif url:
+        try:
+            text = extract_text_from_url(url)
+            if not text:
+                return None, "Failed to extract text from URL."
+        except Exception as e:
+            return None, f"Error extracting text from URL: {str(e)}"
+    elif video_url:
+        try:
+            text = transcribe_youtube_video(video_url)
+            if not text:
+                return None, "Failed to transcribe YouTube video."
+        except Exception as e:
+            return None, f"Error transcribing YouTube video: {str(e)}"
+    elif research_topic_input:
+        try:
+            text = research_topic(research_topic_input)
+            if not text:
+                return None, f"Sorry, no information found on '{research_topic_input}'."
+        except Exception as e:
+            return None, f"Error researching topic: {str(e)}"
+    # Generate script
+    try:
+        text = truncate_text(text)
+        script = generate_script(SYSTEM_PROMPT, text, tone, length)
+    except Exception as e:
+        return None, f"Error generating script: {str(e)}"
+    audio_segments = []
+    transcript = ""
+    crossfade_duration = 50  # ms
+    try:
+        for item in script.dialogue:
+            audio_file = generate_audio_mp3(item.text, item.speaker)
+            seg = AudioSegment.from_file(audio_file, format="mp3")
+            audio_segments.append(seg)
+            transcript += f"**{item.speaker}**: {item.text}\n\n"
+            os.remove(audio_file)
+        if not audio_segments:
+            return None, "No audio segments generated."
+        combined_spoken = audio_segments[0]
+        for seg in audio_segments[1:]:
+            combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
+        # Mix with bg music
+        final_mix = mix_with_bg_music(combined_spoken)
+        # Export to bytes
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
+            final_mix.export(temp_audio.name, format="mp3")
+            final_mp3_path = temp_audio.name
+        with open(final_mp3_path, "rb") as f:
+            audio_bytes = f.read()
+        os.remove(final_mp3_path)
+        return audio_bytes, transcript
+    except Exception as e:
+        return None, f"Error generating audio: {str(e)}"
+def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
+    """
+    Mixes 'spoken' with bg_music.mp3 in the root folder:
+    1) Start with 2 seconds of music alone before speech begins.
+    2) Loop the music if it's shorter than the final audio length.
+    3) Lower the music volume so the speech is clear.
+    """
+    bg_music_path = "bg_music.mp3"  # in root folder
+    try:
+        bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
+    except Exception as e:
+        print("[ERROR] Failed to load background music:", e)
+        return spoken
+    bg_music = bg_music - 14.0  # Lower volume (e.g. -14 dB)
+    total_length_ms = len(spoken) + 2000
+    looped_music = AudioSegment.empty()
+    while len(looped_music) < total_length_ms:
+        looped_music += bg_music
+    looped_music = looped_music[:total_length_ms]
+    # Overlay spoken at 2000ms so we get 2s of music first
+    final_mix = looped_music.overlay(spoken, position=2000)
+    return final_mix
+def highlight_differences(original: str, edited: str) -> str:
+    """
+    Highlights the differences between the original and edited transcripts.
+    Added or modified words are wrapped in <span> tags with red color.
+    """
+    matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
+    highlighted = []
+    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
+        if opcode == 'equal':
+            # Unchanged words
+            highlighted.extend(original.split()[i1:i2])
+        elif opcode in ('replace', 'insert'):
+            # Added or replaced words - highlight in red
+            added_words = edited.split()[j1:j2]
+            highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
+        elif opcode == 'delete':
+            # Deleted words - optionally, can be shown differently
+            # For now, we'll ignore deletions in the highlighted transcript
+            pass
+    return ' '.join(highlighted)
+def main():
+    st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
+    # Use smaller font for the main header
+    st.markdown("## MyPod - AI powered Podcast Generator")
+    st.markdown(
+        "Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉\n\n"
+        "MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
+        "Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
+        "### How to use:\n"
+        "1. **Provide one source:** PDF Files, Website URL, YouTube link or a Topic to Research.\n"
+        "2. **Choose the tone and the target duration.**\n"
+        "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
+        "**Research a Topic:** Please be as detailed as possible in your topic statement. If it's too niche or specific, "
+        "you might not get the desired outcome. We'll fetch information from Wikipedia, News RSS feeds or the LLM knowledge base to get recent info about the topic.\n\n"
+        "**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
+        "**Note:** YouTube videos will only work if they have captions built in.\n\n"
+        "⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
+        "and high-quality audio synthesis, which may take a few minutes.\n\n"
+        "🔥 **Ready to create your personalized podcast?** Give it a try now and let the magic happen! 🔥"
+    )
+    col1, col2 = st.columns(2)
+    with col1:
+        file = st.file_uploader("Upload File (.pdf only)", type=["pdf"])
+        url = st.text_input("Or Enter Website URL")
+        video_url = st.text_input("Or Enter YouTube Link (Captioned videos)")
+    with col2:
+        research_topic_input = st.text_input("Or Research a Topic")
+        tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
+        length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
+    # Store results in session_state
+    if "audio_bytes" not in st.session_state:
+        st.session_state["audio_bytes"] = None
+    if "transcript" not in st.session_state:
+        st.session_state["transcript"] = None
+    if "transcript_original" not in st.session_state:
+        st.session_state["transcript_original"] = None  # Store original transcript
+    # Add only the "Generate Podcast" button, centered
+    generate_button = st.button("Generate Podcast")
+    if generate_button:
+        progress_bar = st.progress(0)
+        progress_text = st.empty()
+        # Define progress stages and messages
+        progress_messages = [
+            "🔍 Analyzing your input...",
+            "📝 Crafting the perfect script...",
+            "🎙️ Generating high-quality audio...",
+            "🎶 Adding the finishing touches..."
+        ]
+        # Initialize progress at 0%
+        progress_text.write(progress_messages[0])
+        progress_bar.progress(0)
+        time.sleep(1.0)
+        # Update to 25%
+        progress_text.write(progress_messages[1])
+        progress_bar.progress(25)
+        time.sleep(1.0)
+        # Update to 50%
+        progress_text.write(progress_messages[2])
+        progress_bar.progress(50)
+        time.sleep(1.0)
+        # Update to 75%
+        progress_text.write(progress_messages[3])
+        progress_bar.progress(75)
+        time.sleep(1.0)
+        # Finalize to 100%
+        audio_bytes, transcript = generate_podcast(
+            file, url, video_url, research_topic_input, tone, length
+        )
+        progress_bar.progress(100)
+        progress_text.write("✅ Done!")
+        if audio_bytes is None:
+            st.error(transcript)
+            st.session_state["audio_bytes"] = None
+            st.session_state["transcript"] = None
+            st.session_state["transcript_original"] = None
+        else:
+            st.success("Podcast generated successfully!")
+            st.session_state["audio_bytes"] = audio_bytes
+            st.session_state["transcript"] = transcript
+            st.session_state["transcript_original"] = transcript  # Store original transcript
+    if st.session_state["audio_bytes"]:
+        st.audio(st.session_state["audio_bytes"], format='audio/mp3')
+        st.download_button(
+            label="Download Podcast (MP3)",
+            data=st.session_state["audio_bytes"],
+            file_name="my_podcast.mp3",
+            mime="audio/mpeg"
+        )
+        st.markdown("### Generated Transcript (Editable)")
+        # Editable text area for transcript
+        edited_text = st.text_area(
+            "Feel free to tweak lines, fix errors, or reword anything.",
+            value=st.session_state["transcript"],
+            height=300
+        )
+        # Compute differences and highlight edited text
+        if st.session_state["transcript_original"]:
+            highlighted_transcript = highlight_differences(
+                st.session_state["transcript_original"],
+                edited_text
+            )
+            st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
+            st.markdown(highlighted_transcript, unsafe_allow_html=True)
+        if st.button("Regenerate Audio From Edited Text"):
+            regen_bar = st.progress(0)
+            regen_text = st.empty()
+            regen_text.write("🔄 Regenerating your podcast with the edits...")
+            regen_bar.progress(25)
+            time.sleep(1.0)
+            regen_text.write("🔧 Adjusting the script based on your changes...")
+            regen_bar.progress(50)
+            time.sleep(1.0)
+            dialogue_items = parse_user_edited_transcript(edited_text)
+            new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items)
+            regen_bar.progress(75)
+            time.sleep(1.0)
+            if new_audio_bytes is None:
+                regen_bar.progress(100)
+                st.error(new_transcript)
+            else:
+                regen_bar.progress(100)
+                regen_text.write("✅ Regeneration complete!")
+                st.success("Regenerated audio below:")
+                st.session_state["audio_bytes"] = new_audio_bytes
+                st.session_state["transcript"] = new_transcript
+                st.session_state["transcript_original"] = new_transcript  # Update original transcript
+                st.audio(new_audio_bytes, format='audio/mp3')
+                st.download_button(
+                    label="Download Edited Podcast (MP3)",
+                    data=new_audio_bytes,
+                    file_name="my_podcast_edited.mp3",
+                    mime="audio/mpeg"
+                )
+                st.markdown("### Updated Transcript")
+                st.markdown(new_transcript)
+# ---------------------------------------------------------------------
+# Function to mix with background music is same as before
+# ---------------------------------------------------------------------
+def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
+    """
+    Mixes 'spoken' with bg_music.mp3 in the root folder:
+    1) Start with 2 seconds of music alone before speech begins.
+    2) Loop the music if it's shorter than the final audio length.
+    3) Lower the music volume so the speech is clear.
+    """
+    bg_music_path = "bg_music.mp3"  # in root folder
+    try:
+        bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
+    except Exception as e:
+        print("[ERROR] Failed to load background music:", e)
+        return spoken
+    bg_music = bg_music - 14.0  # Lower volume (e.g. -14 dB)
+    total_length_ms = len(spoken) + 2000
+    looped_music = AudioSegment.empty()
+    while len(looped_music) < total_length_ms:
+        looped_music += bg_music
+    looped_music = looped_music[:total_length_ms]
+    # Overlay spoken at 2000ms so we get 2s of music first
+    final_mix = looped_music.overlay(spoken, position=2000)
+    return final_mix
+def highlight_differences(original: str, edited: str) -> str:
+    """
+    Highlights the differences between the original and edited transcripts.
+    Added or modified words are wrapped in <span> tags with red color.
+    """
+    matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
+    highlighted = []
+    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
+        if opcode == 'equal':
+            # Unchanged words
+            highlighted.extend(original.split()[i1:i2])
+        elif opcode in ('replace', 'insert'):
+            # Added or replaced words - highlight in red
+            added_words = edited.split()[j1:j2]
+            highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
+        elif opcode == 'delete':
+            # Deleted words - optionally, can be shown differently
+            # For now, we'll ignore deletions in the highlighted transcript
+            pass
+    return ' '.join(highlighted)
+if __name__ == "__main__":
+    main()