Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -21,12 +21,22 @@ from utils import (
|
|
21 |
)
|
22 |
from prompts import SYSTEM_PROMPT
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
|
27 |
-
MAX_QA_QUESTIONS = 5 #
|
28 |
|
29 |
def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
pattern = r"\*\*(.+?)\*\*:\s*(.+)"
|
31 |
matches = re.findall(pattern, edited_text)
|
32 |
|
@@ -61,6 +71,11 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
|
|
61 |
return items
|
62 |
|
63 |
def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
|
|
|
|
|
|
|
|
|
|
|
64 |
audio_segments = []
|
65 |
transcript = ""
|
66 |
crossfade_duration = 50 # ms
|
@@ -107,6 +122,12 @@ def generate_podcast(
|
|
107 |
sponsor_style,
|
108 |
custom_bg_music_path
|
109 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
111 |
if sum(sources) > 1:
|
112 |
return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
|
@@ -144,6 +165,7 @@ def generate_podcast(
|
|
144 |
except Exception as e:
|
145 |
return None, f"Error researching topic: {str(e)}"
|
146 |
|
|
|
147 |
text = truncate_text(text)
|
148 |
|
149 |
extra_instructions = []
|
@@ -161,11 +183,12 @@ def generate_podcast(
|
|
161 |
)
|
162 |
|
163 |
from prompts import SYSTEM_PROMPT
|
|
|
164 |
full_prompt = SYSTEM_PROMPT
|
165 |
-
if
|
166 |
-
combined_instructions = "\n\n".join(extra_instructions).strip()
|
167 |
full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
|
168 |
|
|
|
169 |
try:
|
170 |
script = generate_script(
|
171 |
full_prompt,
|
@@ -181,7 +204,7 @@ def generate_podcast(
|
|
181 |
|
182 |
audio_segments = []
|
183 |
transcript = ""
|
184 |
-
crossfade_duration = 50
|
185 |
|
186 |
try:
|
187 |
for item in script.dialogue:
|
@@ -293,7 +316,7 @@ def main():
|
|
293 |
st.session_state["transcript"] = None
|
294 |
if "transcript_original" not in st.session_state:
|
295 |
st.session_state["transcript_original"] = None
|
296 |
-
|
297 |
if "qa_count" not in st.session_state:
|
298 |
st.session_state["qa_count"] = 0
|
299 |
if "conversation_history" not in st.session_state:
|
@@ -358,10 +381,10 @@ def main():
|
|
358 |
st.session_state["audio_bytes"] = audio_bytes
|
359 |
st.session_state["transcript"] = transcript
|
360 |
st.session_state["transcript_original"] = transcript
|
|
|
361 |
st.session_state["qa_count"] = 0
|
362 |
st.session_state["conversation_history"] = ""
|
363 |
|
364 |
-
# Display generated audio and transcript if present
|
365 |
if st.session_state["audio_bytes"]:
|
366 |
st.audio(st.session_state["audio_bytes"], format='audio/mp3')
|
367 |
st.download_button(
|
@@ -378,24 +401,12 @@ def main():
|
|
378 |
height=300
|
379 |
)
|
380 |
|
381 |
-
def highlight_differences(original: str, edited: str) -> str:
|
382 |
-
matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
|
383 |
-
highlighted = []
|
384 |
-
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
|
385 |
-
if opcode == 'equal':
|
386 |
-
highlighted.extend(original.split()[i1:i2])
|
387 |
-
elif opcode in ('replace', 'insert'):
|
388 |
-
added_words = edited.split()[j1:j2]
|
389 |
-
highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
|
390 |
-
elif opcode == 'delete':
|
391 |
-
pass
|
392 |
-
return ' '.join(highlighted)
|
393 |
-
|
394 |
if st.session_state["transcript_original"]:
|
395 |
highlighted_transcript = highlight_differences(
|
396 |
st.session_state["transcript_original"],
|
397 |
edited_text
|
398 |
)
|
|
|
399 |
st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
|
400 |
st.markdown(highlighted_transcript, unsafe_allow_html=True)
|
401 |
|
@@ -443,36 +454,45 @@ def main():
|
|
443 |
st.markdown("### Updated Transcript")
|
444 |
st.markdown(new_transcript)
|
445 |
|
446 |
-
#
|
447 |
-
|
448 |
-
|
|
|
449 |
used_questions = st.session_state["qa_count"]
|
450 |
remaining = MAX_QA_QUESTIONS - used_questions
|
451 |
|
452 |
if remaining > 0:
|
453 |
st.write(f"You can ask up to {remaining} more question(s).")
|
454 |
|
455 |
-
typed_q = st.text_input("Type your follow-up question
|
|
|
456 |
|
457 |
if st.button("Submit Q&A"):
|
458 |
if used_questions >= MAX_QA_QUESTIONS:
|
459 |
st.warning("You have reached the Q&A limit.")
|
460 |
else:
|
461 |
question_text = typed_q.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
462 |
if not question_text:
|
463 |
-
st.warning("No question found
|
464 |
else:
|
465 |
-
st.write(
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
# Update conversation
|
474 |
-
st.session_state["conversation_history"] += f"\nUser: {question_text}\nJohn: {fake_answer}\n"
|
475 |
-
st.session_state["qa_count"] += 1
|
476 |
else:
|
477 |
st.write("You have used all 5 Q&A opportunities.")
|
478 |
|
|
|
21 |
)
|
22 |
from prompts import SYSTEM_PROMPT
|
23 |
|
24 |
+
# NEW: For Q&A
|
25 |
+
from qa import transcribe_audio_deepgram, handle_qa_exchange
|
26 |
|
27 |
+
MAX_QA_QUESTIONS = 5 # up to 5 voice/text questions
|
28 |
|
29 |
def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
|
30 |
+
"""
|
31 |
+
Looks for lines like:
|
32 |
+
**Angela**: Hello
|
33 |
+
**Dimitris**: Great topic...
|
34 |
+
We treat 'Angela' as the raw display_speaker, 'Hello' as text.
|
35 |
+
Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
|
36 |
+
'Dimitris' -> speaker='John' (if it matches guest_name), etc.
|
37 |
+
|
38 |
+
Returns a list of DialogueItem.
|
39 |
+
"""
|
40 |
pattern = r"\*\*(.+?)\*\*:\s*(.+)"
|
41 |
matches = re.findall(pattern, edited_text)
|
42 |
|
|
|
71 |
return items
|
72 |
|
73 |
def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
|
74 |
+
"""
|
75 |
+
Re-generates multi-speaker audio from user-edited DialogueItems,
|
76 |
+
then mixes with background music or custom music.
|
77 |
+
Returns (audio_bytes, transcript_str).
|
78 |
+
"""
|
79 |
audio_segments = []
|
80 |
transcript = ""
|
81 |
crossfade_duration = 50 # ms
|
|
|
122 |
sponsor_style,
|
123 |
custom_bg_music_path
|
124 |
):
|
125 |
+
"""
|
126 |
+
Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
|
127 |
+
Ensures female voice for host (Jane), male voice for guest (John).
|
128 |
+
Sponsor content is either separate or blended based on sponsor_style.
|
129 |
+
Returns (audio_bytes, transcript_str).
|
130 |
+
"""
|
131 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
132 |
if sum(sources) > 1:
|
133 |
return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
|
|
|
165 |
except Exception as e:
|
166 |
return None, f"Error researching topic: {str(e)}"
|
167 |
|
168 |
+
from utils import truncate_text
|
169 |
text = truncate_text(text)
|
170 |
|
171 |
extra_instructions = []
|
|
|
183 |
)
|
184 |
|
185 |
from prompts import SYSTEM_PROMPT
|
186 |
+
combined_instructions = "\n\n".join(extra_instructions).strip()
|
187 |
full_prompt = SYSTEM_PROMPT
|
188 |
+
if combined_instructions:
|
|
|
189 |
full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
|
190 |
|
191 |
+
from utils import generate_script, generate_audio_mp3, mix_with_bg_music
|
192 |
try:
|
193 |
script = generate_script(
|
194 |
full_prompt,
|
|
|
204 |
|
205 |
audio_segments = []
|
206 |
transcript = ""
|
207 |
+
crossfade_duration = 50
|
208 |
|
209 |
try:
|
210 |
for item in script.dialogue:
|
|
|
316 |
st.session_state["transcript"] = None
|
317 |
if "transcript_original" not in st.session_state:
|
318 |
st.session_state["transcript_original"] = None
|
319 |
+
# For Q&A
|
320 |
if "qa_count" not in st.session_state:
|
321 |
st.session_state["qa_count"] = 0
|
322 |
if "conversation_history" not in st.session_state:
|
|
|
381 |
st.session_state["audio_bytes"] = audio_bytes
|
382 |
st.session_state["transcript"] = transcript
|
383 |
st.session_state["transcript_original"] = transcript
|
384 |
+
# Reset Q&A
|
385 |
st.session_state["qa_count"] = 0
|
386 |
st.session_state["conversation_history"] = ""
|
387 |
|
|
|
388 |
if st.session_state["audio_bytes"]:
|
389 |
st.audio(st.session_state["audio_bytes"], format='audio/mp3')
|
390 |
st.download_button(
|
|
|
401 |
height=300
|
402 |
)
|
403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
if st.session_state["transcript_original"]:
|
405 |
highlighted_transcript = highlight_differences(
|
406 |
st.session_state["transcript_original"],
|
407 |
edited_text
|
408 |
)
|
409 |
+
|
410 |
st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
|
411 |
st.markdown(highlighted_transcript, unsafe_allow_html=True)
|
412 |
|
|
|
454 |
st.markdown("### Updated Transcript")
|
455 |
st.markdown(new_transcript)
|
456 |
|
457 |
+
# -----------------------
|
458 |
+
# POST-PODCAST Q&A Logic
|
459 |
+
# -----------------------
|
460 |
+
st.markdown("## Post-Podcast Q&A")
|
461 |
used_questions = st.session_state["qa_count"]
|
462 |
remaining = MAX_QA_QUESTIONS - used_questions
|
463 |
|
464 |
if remaining > 0:
|
465 |
st.write(f"You can ask up to {remaining} more question(s).")
|
466 |
|
467 |
+
typed_q = st.text_input("Type your follow-up question:")
|
468 |
+
audio_q = st.file_uploader("Or upload an audio question (WAV, MP3)")
|
469 |
|
470 |
if st.button("Submit Q&A"):
|
471 |
if used_questions >= MAX_QA_QUESTIONS:
|
472 |
st.warning("You have reached the Q&A limit.")
|
473 |
else:
|
474 |
question_text = typed_q.strip()
|
475 |
+
if audio_q is not None:
|
476 |
+
suffix = ".wav"
|
477 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
478 |
+
tmp.write(audio_q.read())
|
479 |
+
local_audio_path = tmp.name
|
480 |
+
st.write("Transcribing your audio question...")
|
481 |
+
audio_transcript = transcribe_audio_deepgram(local_audio_path)
|
482 |
+
if audio_transcript:
|
483 |
+
question_text = audio_transcript
|
484 |
+
|
485 |
if not question_text:
|
486 |
+
st.warning("No question found (text or audio).")
|
487 |
else:
|
488 |
+
st.write("Generating an answer...")
|
489 |
+
ans_audio, ans_text = handle_qa_exchange(question_text)
|
490 |
+
if ans_audio:
|
491 |
+
st.audio(ans_audio, format="audio/mp3")
|
492 |
+
st.markdown(f"**John**: {ans_text}")
|
493 |
+
st.session_state["qa_count"] += 1
|
494 |
+
else:
|
495 |
+
st.warning("No response could be generated.")
|
|
|
|
|
|
|
496 |
else:
|
497 |
st.write("You have used all 5 Q&A opportunities.")
|
498 |
|