MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 14

Commit

c2b1adf

verified ·

1 Parent(s): 073fcba

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -37

app.py CHANGED Viewed

@@ -21,12 +21,22 @@ from utils import (
 )
 from prompts import SYSTEM_PROMPT
-# We are no longer importing streamlit-webrtc or mic-based Q&A
-# from qa import AudioBufferProcessor, handle_qa_exchange, transcribe_audio_deepgram
-MAX_QA_QUESTIONS = 5  # Up to 5 typed follow-up questions
 def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
     pattern = r"\*\*(.+?)\*\*:\s*(.+)"
     matches = re.findall(pattern, edited_text)
@@ -61,6 +71,11 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
     return items
 def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
     audio_segments = []
     transcript = ""
     crossfade_duration = 50  # ms
@@ -107,6 +122,12 @@ def generate_podcast(
     sponsor_style,
     custom_bg_music_path
 ):
     sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
     if sum(sources) > 1:
         return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
@@ -144,6 +165,7 @@ def generate_podcast(
         except Exception as e:
             return None, f"Error researching topic: {str(e)}"
     text = truncate_text(text)
     extra_instructions = []
@@ -161,11 +183,12 @@ def generate_podcast(
         )
     from prompts import SYSTEM_PROMPT
     full_prompt = SYSTEM_PROMPT
-    if extra_instructions:
-        combined_instructions = "\n\n".join(extra_instructions).strip()
         full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
     try:
         script = generate_script(
             full_prompt,
@@ -181,7 +204,7 @@ def generate_podcast(
     audio_segments = []
     transcript = ""
-    crossfade_duration = 50  # ms
     try:
         for item in script.dialogue:
@@ -293,7 +316,7 @@ def main():
         st.session_state["transcript"] = None
     if "transcript_original" not in st.session_state:
         st.session_state["transcript_original"] = None
     if "qa_count" not in st.session_state:
         st.session_state["qa_count"] = 0
     if "conversation_history" not in st.session_state:
@@ -358,10 +381,10 @@ def main():
             st.session_state["audio_bytes"] = audio_bytes
             st.session_state["transcript"] = transcript
             st.session_state["transcript_original"] = transcript
             st.session_state["qa_count"] = 0
             st.session_state["conversation_history"] = ""
-    # Display generated audio and transcript if present
     if st.session_state["audio_bytes"]:
         st.audio(st.session_state["audio_bytes"], format='audio/mp3')
         st.download_button(
@@ -378,24 +401,12 @@ def main():
             height=300
         )
-        def highlight_differences(original: str, edited: str) -> str:
-            matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
-            highlighted = []
-            for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
-                if opcode == 'equal':
-                    highlighted.extend(original.split()[i1:i2])
-                elif opcode in ('replace', 'insert'):
-                    added_words = edited.split()[j1:j2]
-                    highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
-                elif opcode == 'delete':
-                    pass
-            return ' '.join(highlighted)
         if st.session_state["transcript_original"]:
             highlighted_transcript = highlight_differences(
                 st.session_state["transcript_original"],
                 edited_text
             )
             st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
             st.markdown(highlighted_transcript, unsafe_allow_html=True)
@@ -443,36 +454,45 @@ def main():
                 st.markdown("### Updated Transcript")
                 st.markdown(new_transcript)
-        # ----------- POST-PODCAST Q&A (Text-based) -----------
-        st.markdown("## Post-Podcast Q&A (Text-based)")
         used_questions = st.session_state["qa_count"]
         remaining = MAX_QA_QUESTIONS - used_questions
         if remaining > 0:
             st.write(f"You can ask up to {remaining} more question(s).")
-            typed_q = st.text_input("Type your follow-up question here:")
             if st.button("Submit Q&A"):
                 if used_questions >= MAX_QA_QUESTIONS:
                     st.warning("You have reached the Q&A limit.")
                 else:
                     question_text = typed_q.strip()
                     if not question_text:
-                        st.warning("No question found. Please type something.")
                     else:
-                        st.write(f"**You asked**: {question_text}")
-                        # We'll just store the question + a mock response for now
-                        # or you can do an LLM call
-                        # For example, let's do a minimal approach:
-                        fake_answer = "That's a great question! I'd love to answer, but I'm currently text-based only."
-                        st.write(f"**John**: {fake_answer}")
-                        # Update conversation
-                        st.session_state["conversation_history"] += f"\nUser: {question_text}\nJohn: {fake_answer}\n"
-                        st.session_state["qa_count"] += 1
         else:
             st.write("You have used all 5 Q&A opportunities.")

 )
 from prompts import SYSTEM_PROMPT
+# NEW: For Q&A
+from qa import transcribe_audio_deepgram, handle_qa_exchange
+MAX_QA_QUESTIONS = 5  # up to 5 voice/text questions
 def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
+    """
+    Looks for lines like:
+        **Angela**: Hello
+        **Dimitris**: Great topic...
+    We treat 'Angela' as the raw display_speaker, 'Hello' as text.
+    Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
+    'Dimitris' -> speaker='John' (if it matches guest_name), etc.
+    Returns a list of DialogueItem.
+    """
     pattern = r"\*\*(.+?)\*\*:\s*(.+)"
     matches = re.findall(pattern, edited_text)
     return items
 def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
+    """
+    Re-generates multi-speaker audio from user-edited DialogueItems,
+    then mixes with background music or custom music.
+    Returns (audio_bytes, transcript_str).
+    """
     audio_segments = []
     transcript = ""
     crossfade_duration = 50  # ms
     sponsor_style,
     custom_bg_music_path
 ):
+    """
+    Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
+    Ensures female voice for host (Jane), male voice for guest (John).
+    Sponsor content is either separate or blended based on sponsor_style.
+    Returns (audio_bytes, transcript_str).
+    """
     sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
     if sum(sources) > 1:
         return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
         except Exception as e:
             return None, f"Error researching topic: {str(e)}"
+    from utils import truncate_text
     text = truncate_text(text)
     extra_instructions = []
         )
     from prompts import SYSTEM_PROMPT
+    combined_instructions = "\n\n".join(extra_instructions).strip()
     full_prompt = SYSTEM_PROMPT
+    if combined_instructions:
         full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
+    from utils import generate_script, generate_audio_mp3, mix_with_bg_music
     try:
         script = generate_script(
             full_prompt,
     audio_segments = []
     transcript = ""
+    crossfade_duration = 50
     try:
         for item in script.dialogue:
         st.session_state["transcript"] = None
     if "transcript_original" not in st.session_state:
         st.session_state["transcript_original"] = None
+    # For Q&A
     if "qa_count" not in st.session_state:
         st.session_state["qa_count"] = 0
     if "conversation_history" not in st.session_state:
             st.session_state["audio_bytes"] = audio_bytes
             st.session_state["transcript"] = transcript
             st.session_state["transcript_original"] = transcript
+            # Reset Q&A
             st.session_state["qa_count"] = 0
             st.session_state["conversation_history"] = ""
     if st.session_state["audio_bytes"]:
         st.audio(st.session_state["audio_bytes"], format='audio/mp3')
         st.download_button(
             height=300
         )
         if st.session_state["transcript_original"]:
             highlighted_transcript = highlight_differences(
                 st.session_state["transcript_original"],
                 edited_text
             )
             st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
             st.markdown(highlighted_transcript, unsafe_allow_html=True)
                 st.markdown("### Updated Transcript")
                 st.markdown(new_transcript)
+        # -----------------------
+        # POST-PODCAST Q&A Logic
+        # -----------------------
+        st.markdown("## Post-Podcast Q&A")
         used_questions = st.session_state["qa_count"]
         remaining = MAX_QA_QUESTIONS - used_questions
         if remaining > 0:
             st.write(f"You can ask up to {remaining} more question(s).")
+            typed_q = st.text_input("Type your follow-up question:")
+            audio_q = st.file_uploader("Or upload an audio question (WAV, MP3)")
             if st.button("Submit Q&A"):
                 if used_questions >= MAX_QA_QUESTIONS:
                     st.warning("You have reached the Q&A limit.")
                 else:
                     question_text = typed_q.strip()
+                    if audio_q is not None:
+                        suffix = ".wav"
+                        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                            tmp.write(audio_q.read())
+                            local_audio_path = tmp.name
+                        st.write("Transcribing your audio question...")
+                        audio_transcript = transcribe_audio_deepgram(local_audio_path)
+                        if audio_transcript:
+                            question_text = audio_transcript
                     if not question_text:
+                        st.warning("No question found (text or audio).")
                     else:
+                        st.write("Generating an answer...")
+                        ans_audio, ans_text = handle_qa_exchange(question_text)
+                        if ans_audio:
+                            st.audio(ans_audio, format="audio/mp3")
+                            st.markdown(f"**John**: {ans_text}")
+                            st.session_state["qa_count"] += 1
+                        else:
+                            st.warning("No response could be generated.")
         else:
             st.write("You have used all 5 Q&A opportunities.")