MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 14

Commit

d4f0b3c

verified ·

1 Parent(s): b0e78f5

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -55

app.py CHANGED Viewed

@@ -21,22 +21,13 @@ from utils import (
 )
 from prompts import SYSTEM_PROMPT
-# NEW: For Q&A
-from qa import transcribe_audio_deepgram, handle_qa_exchange
-MAX_QA_QUESTIONS = 5  # up to 5 voice/text questions
 def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
-    """
-    Looks for lines like:
-        **Angela**: Hello
-        **Dimitris**: Great topic...
-    We treat 'Angela' as the raw display_speaker, 'Hello' as text.
-    Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
-    'Dimitris' -> speaker='John' (if it matches guest_name), etc.
-    Returns a list of DialogueItem.
-    """
     pattern = r"\*\*(.+?)\*\*:\s*(.+)"
     matches = re.findall(pattern, edited_text)
@@ -71,14 +62,9 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
     return items
 def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
-    """
-    Re-generates multi-speaker audio from user-edited DialogueItems,
-    then mixes with background music or custom music.
-    Returns (audio_bytes, transcript_str).
-    """
     audio_segments = []
     transcript = ""
-    crossfade_duration = 50  # ms
     for item in dialogue_items:
         audio_file = generate_audio_mp3(item.text, item.speaker)
@@ -122,12 +108,6 @@ def generate_podcast(
     sponsor_style,
     custom_bg_music_path
 ):
-    """
-    Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
-    Ensures female voice for host (Jane), male voice for guest (John).
-    Sponsor content is either separate or blended based on sponsor_style.
-    Returns (audio_bytes, transcript_str).
-    """
     sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
     if sum(sources) > 1:
         return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
@@ -165,7 +145,6 @@ def generate_podcast(
         except Exception as e:
             return None, f"Error researching topic: {str(e)}"
-    from utils import truncate_text
     text = truncate_text(text)
     extra_instructions = []
@@ -183,12 +162,12 @@ def generate_podcast(
         )
     from prompts import SYSTEM_PROMPT
     combined_instructions = "\n\n".join(extra_instructions).strip()
     full_prompt = SYSTEM_PROMPT
     if combined_instructions:
         full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
-    from utils import generate_script, generate_audio_mp3, mix_with_bg_music
     try:
         script = generate_script(
             full_prompt,
@@ -223,6 +202,7 @@ def generate_podcast(
         final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
             final_mix.export(temp_audio.name, format="mp3")
             final_mp3_path = temp_audio.name
@@ -316,6 +296,7 @@ def main():
         st.session_state["transcript"] = None
     if "transcript_original" not in st.session_state:
         st.session_state["transcript_original"] = None
     # For Q&A
     if "qa_count" not in st.session_state:
         st.session_state["qa_count"] = 0
@@ -351,6 +332,7 @@ def main():
         progress_bar.progress(75)
         time.sleep(1.0)
         audio_bytes, transcript = generate_podcast(
             file,
             url,
@@ -381,7 +363,6 @@ def main():
             st.session_state["audio_bytes"] = audio_bytes
             st.session_state["transcript"] = transcript
             st.session_state["transcript_original"] = transcript
-            # Reset Q&A
             st.session_state["qa_count"] = 0
             st.session_state["conversation_history"] = ""
@@ -401,12 +382,25 @@ def main():
             height=300
         )
         if st.session_state["transcript_original"]:
             highlighted_transcript = highlight_differences(
                 st.session_state["transcript_original"],
                 edited_text
             )
             st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
             st.markdown(highlighted_transcript, unsafe_allow_html=True)
@@ -454,45 +448,70 @@ def main():
                 st.markdown("### Updated Transcript")
                 st.markdown(new_transcript)
-        # -----------------------
-        # POST-PODCAST Q&A Logic
-        # -----------------------
-        st.markdown("## Post-Podcast Q&A")
         used_questions = st.session_state["qa_count"]
         remaining = MAX_QA_QUESTIONS - used_questions
         if remaining > 0:
             st.write(f"You can ask up to {remaining} more question(s).")
-            typed_q = st.text_input("Type your follow-up question:")
-            audio_q = st.file_uploader("Or upload an audio question (WAV, MP3)")
             if st.button("Submit Q&A"):
                 if used_questions >= MAX_QA_QUESTIONS:
                     st.warning("You have reached the Q&A limit.")
                 else:
-                    question_text = typed_q.strip()
-                    if audio_q is not None:
-                        suffix = ".wav"
-                        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
-                            tmp.write(audio_q.read())
-                            local_audio_path = tmp.name
-                        st.write("Transcribing your audio question...")
-                        audio_transcript = transcribe_audio_deepgram(local_audio_path)
-                        if audio_transcript:
-                            question_text = audio_transcript
-                    if not question_text:
-                        st.warning("No question found (text or audio).")
                     else:
-                        st.write("Generating an answer...")
-                        ans_audio, ans_text = handle_qa_exchange(question_text)
-                        if ans_audio:
-                            st.audio(ans_audio, format="audio/mp3")
-                            st.markdown(f"**John**: {ans_text}")
-                            st.session_state["qa_count"] += 1
                         else:
-                            st.warning("No response could be generated.")
         else:
             st.write("You have used all 5 Q&A opportunities.")

 )
 from prompts import SYSTEM_PROMPT
+# The new Q&A with mic
+from qa import AudioBufferProcessor, handle_qa_exchange
+from streamlit_webrtc import webrtc_streamer, WebRtcMode
+MAX_QA_QUESTIONS = 5
 def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
     pattern = r"\*\*(.+?)\*\*:\s*(.+)"
     matches = re.findall(pattern, edited_text)
     return items
 def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
     audio_segments = []
     transcript = ""
+    crossfade_duration = 50
     for item in dialogue_items:
         audio_file = generate_audio_mp3(item.text, item.speaker)
     sponsor_style,
     custom_bg_music_path
 ):
     sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
     if sum(sources) > 1:
         return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
         except Exception as e:
             return None, f"Error researching topic: {str(e)}"
     text = truncate_text(text)
     extra_instructions = []
         )
     from prompts import SYSTEM_PROMPT
+    from utils import generate_script, generate_audio_mp3, mix_with_bg_music
     combined_instructions = "\n\n".join(extra_instructions).strip()
     full_prompt = SYSTEM_PROMPT
     if combined_instructions:
         full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
     try:
         script = generate_script(
             full_prompt,
         final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
+        import tempfile
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
             final_mix.export(temp_audio.name, format="mp3")
             final_mp3_path = temp_audio.name
         st.session_state["transcript"] = None
     if "transcript_original" not in st.session_state:
         st.session_state["transcript_original"] = None
     # For Q&A
     if "qa_count" not in st.session_state:
         st.session_state["qa_count"] = 0
         progress_bar.progress(75)
         time.sleep(1.0)
+        from utils import truncate_text
         audio_bytes, transcript = generate_podcast(
             file,
             url,
             st.session_state["audio_bytes"] = audio_bytes
             st.session_state["transcript"] = transcript
             st.session_state["transcript_original"] = transcript
             st.session_state["qa_count"] = 0
             st.session_state["conversation_history"] = ""
             height=300
         )
+        from difflib import SequenceMatcher
+        def highlight_differences(original: str, edited: str) -> str:
+            matcher = SequenceMatcher(None, original.split(), edited.split())
+            highlighted = []
+            for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
+                if opcode == 'equal':
+                    highlighted.extend(original.split()[i1:i2])
+                elif opcode in ('replace', 'insert'):
+                    added_words = edited.split()[j1:j2]
+                    highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
+                elif opcode == 'delete':
+                    pass
+            return ' '.join(highlighted)
         if st.session_state["transcript_original"]:
             highlighted_transcript = highlight_differences(
                 st.session_state["transcript_original"],
                 edited_text
             )
             st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
             st.markdown(highlighted_transcript, unsafe_allow_html=True)
                 st.markdown("### Updated Transcript")
                 st.markdown(new_transcript)
+        # ----------- POST-PODCAST Q&A with Microphone -----------
+        st.markdown("## Post-Podcast Q&A (Using Microphone)")
         used_questions = st.session_state["qa_count"]
         remaining = MAX_QA_QUESTIONS - used_questions
         if remaining > 0:
             st.write(f"You can ask up to {remaining} more question(s).")
+            st.write("### Record Your Follow-Up Question:")
+            # Use streamlit-webrtc
+            from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
+            from qa import AudioBufferProcessor
+            RTC_CONFIGURATION = {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
+            webrtc_ctx = webrtc_streamer(
+                key="qna-audio-stream",
+                mode=WebRtcMode.SENDONLY,
+                rtc_configuration=RTC_CONFIGURATION,
+                media_stream_constraints={"audio": True, "video": False},
+                audio_processor_factory=AudioBufferProcessor
+            )
+            if "audio-processor" not in st.session_state:
+                st.session_state["audio-processor"] = None
+            if webrtc_ctx.state.playing and webrtc_ctx.audio_processor:
+                st.session_state["audio-processor"] = webrtc_ctx.audio_processor
+            # Once the user clicks "Stop", we can finalize the frames
+            if webrtc_ctx.state.status == webrtc_ctx.state.STATUS.DISCONNECTED:
+                st.write("Recording Stopped. You may now submit your question.")
             if st.button("Submit Q&A"):
                 if used_questions >= MAX_QA_QUESTIONS:
                     st.warning("You have reached the Q&A limit.")
                 else:
+                    # 1) Finalize WAV
+                    processor = st.session_state.get("audio-processor")
+                    if not processor or not getattr(processor, "frames", None):
+                        st.warning("No recorded audio found. Please record your question first.")
                     else:
+                        local_wav_path = processor.finalize_wav()
+                        if not local_wav_path:
+                            st.warning("No audio frames found. Please record again.")
                         else:
+                            # 2) Transcribe with Deepgram (same logic as your old approach)
+                            from qa import transcribe_audio_deepgram
+                            st.write("Transcribing your voice question via Deepgram...")
+                            question_text = transcribe_audio_deepgram(local_wav_path)
+                            if not question_text.strip():
+                                st.warning("No transcript found. Please try again.")
+                            else:
+                                st.write(f"**You asked**: {question_text}")
+                                # 3) Generate an LLM answer
+                                conversation_so_far = st.session_state["conversation_history"]
+                                ans_audio, ans_text = handle_qa_exchange(conversation_so_far, question_text)
+                                if ans_audio:
+                                    st.audio(ans_audio, format="audio/mp3")
+                                    st.markdown(f"**John**: {ans_text}")
+                                    st.session_state["qa_count"] += 1
+                                else:
+                                    st.warning("No response could be generated.")
         else:
             st.write("You have used all 5 Q&A opportunities.")