Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -20,23 +20,12 @@ from utils import (
|
|
20 |
DialogueItem
|
21 |
)
|
22 |
from prompts import SYSTEM_PROMPT
|
23 |
-
|
24 |
-
# NEW: For Q&A
|
25 |
from qa import transcribe_audio_deepgram, handle_qa_exchange
|
26 |
|
27 |
MAX_QA_QUESTIONS = 5
|
28 |
|
29 |
def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
|
30 |
-
"""
|
31 |
-
Looks for lines like:
|
32 |
-
**Angela**: Hello
|
33 |
-
**Dimitris**: Great topic...
|
34 |
-
We treat 'Angela' as the raw display_speaker, 'Hello' as text.
|
35 |
-
Then we map 'Angela' -> speaker='Jane' (if it matches host_name),
|
36 |
-
'Dimitris' -> speaker='John' (if it matches guest_name), etc.
|
37 |
-
|
38 |
-
Returns a list of DialogueItem.
|
39 |
-
"""
|
40 |
pattern = r"\*\*(.+?)\*\*:\s*(.+)"
|
41 |
matches = re.findall(pattern, edited_text)
|
42 |
|
@@ -71,17 +60,11 @@ def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: s
|
|
71 |
return items
|
72 |
|
73 |
def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
|
74 |
-
"""
|
75 |
-
Re-generates multi-speaker audio from user-edited DialogueItems,
|
76 |
-
then mixes with background music or custom music.
|
77 |
-
Returns (audio_bytes, transcript_str).
|
78 |
-
"""
|
79 |
audio_segments = []
|
80 |
transcript = ""
|
81 |
-
crossfade_duration = 50
|
82 |
|
83 |
for item in dialogue_items:
|
84 |
-
# Generate TTS for each line
|
85 |
audio_file = generate_audio_mp3(item.text, item.speaker)
|
86 |
seg = AudioSegment.from_file(audio_file, format="mp3")
|
87 |
audio_segments.append(seg)
|
@@ -123,12 +106,6 @@ def generate_podcast(
|
|
123 |
sponsor_style,
|
124 |
custom_bg_music_path
|
125 |
):
|
126 |
-
"""
|
127 |
-
Creates a multi-speaker podcast from PDF, URL, YouTube, or a research topic.
|
128 |
-
Ensures female voice for host (Jane), male voice for guest (John).
|
129 |
-
If sponsor_content is empty, we skip sponsor instructions entirely.
|
130 |
-
Returns (audio_bytes, transcript_str).
|
131 |
-
"""
|
132 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
133 |
if sum(sources) > 1:
|
134 |
return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
|
@@ -166,7 +143,6 @@ def generate_podcast(
|
|
166 |
except Exception as e:
|
167 |
return None, f"Error researching topic: {str(e)}"
|
168 |
|
169 |
-
from utils import truncate_text
|
170 |
text = truncate_text(text)
|
171 |
|
172 |
extra_instructions = []
|
@@ -178,7 +154,7 @@ def generate_podcast(
|
|
178 |
if user_specs.strip():
|
179 |
extra_instructions.append(f"Additional User Instructions: {user_specs}")
|
180 |
|
181 |
-
# If
|
182 |
sponsor_instructions_present = False
|
183 |
if sponsor_content.strip():
|
184 |
extra_instructions.append(
|
@@ -189,14 +165,11 @@ def generate_podcast(
|
|
189 |
from prompts import SYSTEM_PROMPT
|
190 |
combined_instructions = "\n\n".join(extra_instructions).strip()
|
191 |
full_prompt = SYSTEM_PROMPT
|
192 |
-
|
193 |
if combined_instructions:
|
194 |
full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
|
195 |
|
196 |
from utils import generate_script, generate_audio_mp3, mix_with_bg_music
|
197 |
try:
|
198 |
-
# If sponsor is empty, we pass the sponsor_style anyway,
|
199 |
-
# but the model won't see sponsor instructions (since none were appended).
|
200 |
script = generate_script(
|
201 |
full_prompt,
|
202 |
text,
|
@@ -204,7 +177,7 @@ def generate_podcast(
|
|
204 |
f"{length_minutes} Mins",
|
205 |
host_name=host_name or "Jane",
|
206 |
guest_name=guest_name or "John",
|
207 |
-
sponsor_style=sponsor_style
|
208 |
)
|
209 |
except Exception as e:
|
210 |
return None, f"Error generating script: {str(e)}"
|
@@ -273,7 +246,7 @@ def main():
|
|
273 |
"conversational podcast.\n"
|
274 |
"Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
|
275 |
"### How to use:\n"
|
276 |
-
"1. **Provide one source:** PDF Files, Website URL, YouTube
|
277 |
"2. **Choose the tone and the target duration.**\n"
|
278 |
"3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, "
|
279 |
" you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
|
@@ -305,10 +278,7 @@ def main():
|
|
305 |
|
306 |
user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
|
307 |
sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
|
308 |
-
sponsor_style = st.selectbox(
|
309 |
-
"Sponsor Integration Style",
|
310 |
-
["Separate Break", "Blended"]
|
311 |
-
)
|
312 |
|
313 |
custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
|
314 |
custom_bg_music_path = None
|
@@ -411,7 +381,6 @@ def main():
|
|
411 |
st.session_state["transcript_original"],
|
412 |
edited_text
|
413 |
)
|
414 |
-
|
415 |
st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
|
416 |
st.markdown(highlighted_transcript, unsafe_allow_html=True)
|
417 |
|
@@ -459,9 +428,9 @@ def main():
|
|
459 |
st.markdown("### Updated Transcript")
|
460 |
st.markdown(new_transcript)
|
461 |
|
462 |
-
#
|
463 |
-
#
|
464 |
-
#
|
465 |
st.markdown("## Post-Podcast Q&A")
|
466 |
used_questions = st.session_state["qa_count"]
|
467 |
remaining = MAX_QA_QUESTIONS - used_questions
|
@@ -470,8 +439,8 @@ def main():
|
|
470 |
st.write(f"You can ask up to {remaining} more question(s).")
|
471 |
|
472 |
typed_q = st.text_input("Type your follow-up question:")
|
473 |
-
#
|
474 |
-
audio_q = st.
|
475 |
|
476 |
if st.button("Submit Q&A"):
|
477 |
if used_questions >= MAX_QA_QUESTIONS:
|
@@ -479,11 +448,10 @@ def main():
|
|
479 |
else:
|
480 |
question_text = typed_q.strip()
|
481 |
if audio_q is not None:
|
482 |
-
|
483 |
-
|
484 |
-
tmp.write(audio_q.read())
|
485 |
local_audio_path = tmp.name
|
486 |
-
st.write("Transcribing your audio question...")
|
487 |
audio_transcript = transcribe_audio_deepgram(local_audio_path)
|
488 |
if audio_transcript:
|
489 |
question_text = audio_transcript
|
|
|
20 |
DialogueItem
|
21 |
)
|
22 |
from prompts import SYSTEM_PROMPT
|
23 |
+
# Q&A
|
|
|
24 |
from qa import transcribe_audio_deepgram, handle_qa_exchange
|
25 |
|
26 |
MAX_QA_QUESTIONS = 5
|
27 |
|
28 |
def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
pattern = r"\*\*(.+?)\*\*:\s*(.+)"
|
30 |
matches = re.findall(pattern, edited_text)
|
31 |
|
|
|
60 |
return items
|
61 |
|
62 |
def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
|
|
|
|
|
|
|
|
|
|
|
63 |
audio_segments = []
|
64 |
transcript = ""
|
65 |
+
crossfade_duration = 50
|
66 |
|
67 |
for item in dialogue_items:
|
|
|
68 |
audio_file = generate_audio_mp3(item.text, item.speaker)
|
69 |
seg = AudioSegment.from_file(audio_file, format="mp3")
|
70 |
audio_segments.append(seg)
|
|
|
106 |
sponsor_style,
|
107 |
custom_bg_music_path
|
108 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
sources = [bool(file), bool(url), bool(video_url), bool(research_topic_input)]
|
110 |
if sum(sources) > 1:
|
111 |
return None, "Provide only one input (PDF, URL, YouTube, or Topic)."
|
|
|
143 |
except Exception as e:
|
144 |
return None, f"Error researching topic: {str(e)}"
|
145 |
|
|
|
146 |
text = truncate_text(text)
|
147 |
|
148 |
extra_instructions = []
|
|
|
154 |
if user_specs.strip():
|
155 |
extra_instructions.append(f"Additional User Instructions: {user_specs}")
|
156 |
|
157 |
+
# If user provided sponsor content, we pass it along; otherwise skip
|
158 |
sponsor_instructions_present = False
|
159 |
if sponsor_content.strip():
|
160 |
extra_instructions.append(
|
|
|
165 |
from prompts import SYSTEM_PROMPT
|
166 |
combined_instructions = "\n\n".join(extra_instructions).strip()
|
167 |
full_prompt = SYSTEM_PROMPT
|
|
|
168 |
if combined_instructions:
|
169 |
full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
|
170 |
|
171 |
from utils import generate_script, generate_audio_mp3, mix_with_bg_music
|
172 |
try:
|
|
|
|
|
173 |
script = generate_script(
|
174 |
full_prompt,
|
175 |
text,
|
|
|
177 |
f"{length_minutes} Mins",
|
178 |
host_name=host_name or "Jane",
|
179 |
guest_name=guest_name or "John",
|
180 |
+
sponsor_style=sponsor_style # If sponsor is empty, no sponsor lines appended
|
181 |
)
|
182 |
except Exception as e:
|
183 |
return None, f"Error generating script: {str(e)}"
|
|
|
246 |
"conversational podcast.\n"
|
247 |
"Select a tone and a duration range. The script will be on-topic, concise, and respect your chosen length.\n\n"
|
248 |
"### How to use:\n"
|
249 |
+
"1. **Provide one source:** PDF Files, Website URL, YouTube link or a Topic to Research.\n"
|
250 |
"2. **Choose the tone and the target duration.**\n"
|
251 |
"3. **Click 'Generate Podcast'** to produce your podcast. After the audio is generated, "
|
252 |
" you can edit the transcript and re-generate the audio with your edits if needed.\n\n"
|
|
|
278 |
|
279 |
user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
|
280 |
sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
|
281 |
+
sponsor_style = st.selectbox("Sponsor Integration Style", ["Separate Break", "Blended"])
|
|
|
|
|
|
|
282 |
|
283 |
custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
|
284 |
custom_bg_music_path = None
|
|
|
381 |
st.session_state["transcript_original"],
|
382 |
edited_text
|
383 |
)
|
|
|
384 |
st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
|
385 |
st.markdown(highlighted_transcript, unsafe_allow_html=True)
|
386 |
|
|
|
428 |
st.markdown("### Updated Transcript")
|
429 |
st.markdown(new_transcript)
|
430 |
|
431 |
+
# -------------------------------------------
|
432 |
+
# Post-Podcast Q&A using st.audio_input():
|
433 |
+
# -------------------------------------------
|
434 |
st.markdown("## Post-Podcast Q&A")
|
435 |
used_questions = st.session_state["qa_count"]
|
436 |
remaining = MAX_QA_QUESTIONS - used_questions
|
|
|
439 |
st.write(f"You can ask up to {remaining} more question(s).")
|
440 |
|
441 |
typed_q = st.text_input("Type your follow-up question:")
|
442 |
+
# Replacing file_uploader with st.audio_input (Streamlit >= 1.41)
|
443 |
+
audio_q = st.audio_input("Or record your voice question")
|
444 |
|
445 |
if st.button("Submit Q&A"):
|
446 |
if used_questions >= MAX_QA_QUESTIONS:
|
|
|
448 |
else:
|
449 |
question_text = typed_q.strip()
|
450 |
if audio_q is not None:
|
451 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
452 |
+
tmp.write(audio_q.getvalue())
|
|
|
453 |
local_audio_path = tmp.name
|
454 |
+
st.write("Transcribing your audio question via Deepgram...")
|
455 |
audio_transcript = transcribe_audio_deepgram(local_audio_path)
|
456 |
if audio_transcript:
|
457 |
question_text = audio_transcript
|