MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 6

Commit

084e565

verified ·

1 Parent(s): f9e4b6b

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -56

app.py CHANGED Viewed

@@ -6,8 +6,7 @@ import re
 import os
 import tempfile
 import pypdf
-from pydub import AudioSegment
-from pydub import effects  # for normalizing volume if needed
 from utils import (
     generate_script,
@@ -19,6 +18,7 @@ from utils import (
 )
 from prompts import SYSTEM_PROMPT
 def parse_user_edited_transcript(edited_text: str):
     """
     Looks for lines like:
@@ -32,13 +32,13 @@ def parse_user_edited_transcript(edited_text: str):
         return [("Jane", edited_text)]
     return matches
 def regenerate_audio_from_dialogue(dialogue_items):
     """
     Re-generates multi-speaker audio from user-edited text,
-    then mixes with background music from the root folder (bg_music.mp3).
     Returns final audio bytes and updated transcript.
     """
-    # 1) Create spoken segments
     audio_segments = []
     transcript = ""
     crossfade_duration = 50  # in ms
@@ -53,15 +53,15 @@ def regenerate_audio_from_dialogue(dialogue_items):
     if not audio_segments:
         return None, "No audio segments were generated."
-    # 2) Combine spoken segments
     combined_spoken = audio_segments[0]
     for seg in audio_segments[1:]:
         combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
-    # 3) Mix with background music
     final_mix = mix_with_bg_music(combined_spoken)
-    # 4) Export to bytes
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
         final_mix.export(temp_audio.name, format="mp3")
         final_mp3_path = temp_audio.name
@@ -72,6 +72,7 @@ def regenerate_audio_from_dialogue(dialogue_items):
     return audio_bytes, transcript
 def generate_podcast(file, url, video_url, research_topic_input, tone, length):
     """
     Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
@@ -83,7 +84,6 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
     if not any(sources):
         return None, "Please provide at least one source."
-    # 1) Fetch text
     text = ""
     if file:
         try:
@@ -115,14 +115,13 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
         except Exception as e:
             return None, f"Error researching topic: {str(e)}"
-    # 2) Generate multi-speaker script
     try:
         text = truncate_text(text)
         script = generate_script(SYSTEM_PROMPT, text, tone, length)
     except Exception as e:
         return None, f"Error generating script: {str(e)}"
-    # 3) Convert dialogue to spoken segments
     audio_segments = []
     transcript = ""
     crossfade_duration = 50  # ms
@@ -138,12 +137,11 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
         if not audio_segments:
             return None, "No audio segments generated."
-        # Combine
         combined_spoken = audio_segments[0]
         for seg in audio_segments[1:]:
             combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
-        # Mix with background music
         final_mix = mix_with_bg_music(combined_spoken)
         # Export to bytes
@@ -160,6 +158,7 @@ def generate_podcast(file, url, video_url, research_topic_input, tone, length):
     except Exception as e:
         return None, f"Error generating audio: {str(e)}"
 def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
     """
     Mixes 'spoken' with bg_music.mp3 in the root folder:
@@ -167,8 +166,7 @@ def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
     2) Loop the music if it's shorter than the final audio length.
     3) Lower the music volume so the speech is clear.
     """
-    # Path to background music in root folder:
-    bg_music_path = "bg_music.mp3"  # root-level file
     try:
         bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
@@ -176,82 +174,56 @@ def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
         print("[ERROR] Failed to load background music:", e)
         return spoken
-    # Lower the music volume (e.g. -14 dB)
-    bg_music = bg_music - 14.0
-    # total_length_ms = spoken length + 2000ms intro
     total_length_ms = len(spoken) + 2000
-    # Loop the music if it's shorter than total_length_ms
     looped_music = AudioSegment.empty()
     while len(looped_music) < total_length_ms:
         looped_music += bg_music
-    # Crop to exact total_length_ms
     looped_music = looped_music[:total_length_ms]
-    # Create 2s intro for music before speech
     final_mix = looped_music.overlay(spoken, position=2000)
     return final_mix
 def main():
-    # Move set_page_config to the top if needed
     st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
-    st.markdown(
-        """
-        <style>
-        :root {
-          color-scheme: light dark;
-        }
-        body {
-          background-color: #f0f2f6;
-          color: #222;
-        }
-        .css-18e3th9 {
-            background-color: #e8eaf2;
-        }
-        .stButton>button {
-            background-color: #0066cc;
-            color: white;
-            border-radius: 8px;
-        }
-        .stProgress>div>div>div>div {
-            background-color: #0066cc;
-        }
-        </style>
-        """,
-        unsafe_allow_html=True
-    )
-    st.title("🎙 MyPod - AI-based Podcast Generator")
     st.markdown(
         "Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉\n\n"
         "MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
         "Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
         "### How to use:\n"
-        "1. **Provide one source:** PDF, URL, YouTube link, or a Topic to Research.\n"
         "2. **Choose the tone and the target duration.**\n"
-        "3. **Click 'Generate Podcast'** to produce your podcast.\n\n"
-        "**After** the audio is generated, you can **edit** the transcript \n"
-        "and **re-generate** the audio with your edits if needed.\n\n"
-        "⏳**Please be patient while your podcast is being generated.** It involves content analysis, script creation, "
         "and high-quality audio synthesis, which may take a few minutes.\n\n"
-        "🔥 **Ready to create your personalized podcast?** Give it a try now!"
     )
     col1, col2 = st.columns(2)
     with col1:
         file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"])
         url = st.text_input("Or Enter URL")
-        video_url = st.text_input("Or Enter YouTube Link")
     with col2:
         research_topic_input = st.text_input("Or Research a Topic")
         tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
         length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
     if "audio_bytes" not in st.session_state:
         st.session_state["audio_bytes"] = None
     if "transcript" not in st.session_state:

 import os
 import tempfile
 import pypdf
+from pydub import AudioSegment, effects
 from utils import (
     generate_script,
 )
 from prompts import SYSTEM_PROMPT
 def parse_user_edited_transcript(edited_text: str):
     """
     Looks for lines like:
         return [("Jane", edited_text)]
     return matches
 def regenerate_audio_from_dialogue(dialogue_items):
     """
     Re-generates multi-speaker audio from user-edited text,
+    then mixes with background music in the root folder (bg_music.mp3).
     Returns final audio bytes and updated transcript.
     """
     audio_segments = []
     transcript = ""
     crossfade_duration = 50  # in ms
     if not audio_segments:
         return None, "No audio segments were generated."
+    # Combine spoken segments
     combined_spoken = audio_segments[0]
     for seg in audio_segments[1:]:
         combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
+    # Mix with background music
     final_mix = mix_with_bg_music(combined_spoken)
+    # Export to bytes
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
         final_mix.export(temp_audio.name, format="mp3")
         final_mp3_path = temp_audio.name
     return audio_bytes, transcript
 def generate_podcast(file, url, video_url, research_topic_input, tone, length):
     """
     Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
     if not any(sources):
         return None, "Please provide at least one source."
     text = ""
     if file:
         try:
         except Exception as e:
             return None, f"Error researching topic: {str(e)}"
+    # Generate script
     try:
         text = truncate_text(text)
         script = generate_script(SYSTEM_PROMPT, text, tone, length)
     except Exception as e:
         return None, f"Error generating script: {str(e)}"
     audio_segments = []
     transcript = ""
     crossfade_duration = 50  # ms
         if not audio_segments:
             return None, "No audio segments generated."
         combined_spoken = audio_segments[0]
         for seg in audio_segments[1:]:
             combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
+        # Mix with bg music
         final_mix = mix_with_bg_music(combined_spoken)
         # Export to bytes
     except Exception as e:
         return None, f"Error generating audio: {str(e)}"
 def mix_with_bg_music(spoken: AudioSegment) -> AudioSegment:
     """
     Mixes 'spoken' with bg_music.mp3 in the root folder:
     2) Loop the music if it's shorter than the final audio length.
     3) Lower the music volume so the speech is clear.
     """
+    bg_music_path = "bg_music.mp3"  # in root folder
     try:
         bg_music = AudioSegment.from_file(bg_music_path, format="mp3")
         print("[ERROR] Failed to load background music:", e)
         return spoken
+    bg_music = bg_music - 14.0  # Lower volume (e.g. -14 dB)
     total_length_ms = len(spoken) + 2000
     looped_music = AudioSegment.empty()
     while len(looped_music) < total_length_ms:
         looped_music += bg_music
     looped_music = looped_music[:total_length_ms]
+    # Overlay spoken at 2000ms so we get 2s of music first
     final_mix = looped_music.overlay(spoken, position=2000)
     return final_mix
 def main():
     st.set_page_config(page_title="MyPod - AI-based Podcast Generator", layout="centered")
+    # Use smaller font for the main header
+    st.markdown("## MyPod - AI powered Podcast Generator")
     st.markdown(
         "Welcome to **MyPod**, your go-to AI-powered podcast generator! 🎉\n\n"
         "MyPod transforms your documents, webpages, YouTube videos, or research topics into a more human-sounding, conversational podcast.\n"
         "Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
         "### How to use:\n"
+        "1. **Provide one source:** PDF, URL, YouTube link (Requires User Auth - Work in Progress), or a Topic to Research.\n"
         "2. **Choose the tone and the target duration.**\n"
+        "3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
+        "**Research a Topic:** Please be as detailed as possible in your topic statement. If it's too niche or specific, "
+        "you might not get the desired outcome. We'll fetch information from Wikipedia and RSS feeds (BBC, CNN, Associated Press, "
+        "NDTV, Times of India, The Hindu, Economic Times, Google News) or the LLM knowledge base to get recent info about the topic.\n\n"
+        "**Token Limit:** Up to ~2,048 tokens are supported. Long inputs may be truncated.\n"
+        "**Note:** YouTube transcription uses Whisper on CPU and may take longer for very long videos.\n\n"
+        "⏳**Please be patient while your podcast is being generated.** This process involves content analysis, script creation, "
         "and high-quality audio synthesis, which may take a few minutes.\n\n"
+        "🔥 **Ready to create your personalized podcast?** Give it a try now and let the magic happen! 🔥"
     )
     col1, col2 = st.columns(2)
     with col1:
         file = st.file_uploader("Upload PDF (.pdf only)", type=["pdf"])
         url = st.text_input("Or Enter URL")
+        video_url = st.text_input("Or Enter YouTube Link (Requires User Auth - Work in Progress)")
     with col2:
         research_topic_input = st.text_input("Or Research a Topic")
         tone = st.radio("Tone", ["Humorous", "Formal", "Casual", "Youthful"], index=2)
         length = st.radio("Length", ["1-3 Mins", "3-5 Mins", "5-10 Mins", "10-20 Mins"], index=0)
+    # Store results in session_state
     if "audio_bytes" not in st.session_state:
         st.session_state["audio_bytes"] = None
     if "transcript" not in st.session_state: