Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
# utils.py
|
2 |
-
|
3 |
import os
|
4 |
import re
|
5 |
import json
|
@@ -15,11 +13,10 @@ import tiktoken
|
|
15 |
from groq import Groq
|
16 |
import numpy as np
|
17 |
import torch
|
18 |
-
import random
|
19 |
|
20 |
class DialogueItem(BaseModel):
|
21 |
-
speaker: Literal["Jane", "John"]
|
22 |
-
display_speaker: str = "Jane"
|
23 |
text: str
|
24 |
|
25 |
class Dialogue(BaseModel):
|
@@ -47,8 +44,7 @@ def truncate_text(text, max_tokens=2048):
|
|
47 |
|
48 |
def extract_text_from_url(url):
|
49 |
"""
|
50 |
-
Fetches and extracts readable text from a given URL
|
51 |
-
(stripping out scripts, styles, etc.).
|
52 |
"""
|
53 |
print("[LOG] Extracting text from URL:", url)
|
54 |
try:
|
@@ -85,8 +81,7 @@ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
|
|
85 |
|
86 |
def is_sufficient(text: str, min_word_count: int = 500) -> bool:
|
87 |
"""
|
88 |
-
Checks if the fetched text meets our sufficiency criteria
|
89 |
-
(e.g., at least 500 words).
|
90 |
"""
|
91 |
word_count = len(text.split())
|
92 |
print(f"[DEBUG] Aggregated word count: {word_count}")
|
@@ -98,6 +93,7 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
|
|
98 |
Appends it to our aggregated info if found.
|
99 |
"""
|
100 |
print("[LOG] Querying LLM for additional information.")
|
|
|
101 |
system_prompt = (
|
102 |
"You are an AI assistant with extensive knowledge up to 2023-10. "
|
103 |
"Provide additional relevant information on the following topic based on your knowledge base.\n\n"
|
@@ -105,7 +101,9 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
|
|
105 |
f"Existing Information: {existing_text}\n\n"
|
106 |
"Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
|
107 |
)
|
|
|
108 |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
|
|
109 |
try:
|
110 |
response = groq_client.chat.completions.create(
|
111 |
messages=[{"role": "system", "content": system_prompt}],
|
@@ -113,19 +111,22 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
|
|
113 |
max_tokens=1024,
|
114 |
temperature=0.7
|
115 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
except Exception as e:
|
117 |
print("[ERROR] Groq API error during fallback:", e)
|
118 |
return ""
|
119 |
-
additional_info = response.choices[0].message.content.strip()
|
120 |
-
print("[DEBUG] Additional information from LLM:")
|
121 |
-
print(additional_info)
|
122 |
-
return additional_info
|
123 |
|
124 |
def research_topic(topic: str) -> str:
|
125 |
"""
|
126 |
Gathers info from various RSS feeds and Wikipedia. If needed, queries the LLM
|
127 |
for more data if the aggregated text is insufficient.
|
128 |
"""
|
|
|
129 |
sources = {
|
130 |
"BBC": "https://feeds.bbci.co.uk/news/rss.xml",
|
131 |
"CNN": "http://rss.cnn.com/rss/edition.rss",
|
@@ -137,484 +138,472 @@ def research_topic(topic: str) -> str:
|
|
137 |
"Google News - Custom": f"https://news.google.com/rss/search?q={requests.utils.quote(topic)}&hl=en-IN&gl=IN&ceid=IN:en",
|
138 |
}
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
181 |
|
182 |
def fetch_wikipedia_summary(topic: str) -> str:
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
def fetch_rss_feed(feed_url: str) -> list:
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
|
|
|
|
|
|
227 |
|
228 |
def find_relevant_article(items, topic: str, min_match=2) -> tuple:
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
def fetch_article_text(link: str) -> str:
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
|
268 |
def generate_script(
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
):
|
278 |
-
|
279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
|
281 |
-
|
282 |
-
numeric_minutes = 3
|
283 |
-
match = re.search(r"(\d+)", target_length)
|
284 |
-
if match:
|
285 |
-
numeric_minutes = int(match.group(1))
|
286 |
|
287 |
-
|
288 |
-
max_words = numeric_minutes * words_per_minute
|
289 |
|
290 |
-
|
291 |
-
"Humorous": "funny and exciting, makes people chuckle",
|
292 |
-
"Formal": "business-like, well-structured, professional",
|
293 |
-
"Casual": "like a conversation between close friends, relaxed and informal",
|
294 |
-
"Youthful": "like how teenagers might chat, energetic and lively"
|
295 |
-
}
|
296 |
-
chosen_tone = tone_map.get(tone, "casual")
|
297 |
-
|
298 |
-
# Determine sponsor instructions based on sponsor_provided and sponsor_style
|
299 |
-
if sponsor_provided:
|
300 |
-
if sponsor_style == "Separate Break":
|
301 |
-
sponsor_instructions = (
|
302 |
-
"If sponsor content is provided, include it in a separate ad break (~30 seconds). "
|
303 |
-
"Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
|
304 |
-
)
|
305 |
-
else:
|
306 |
-
sponsor_instructions = (
|
307 |
-
"If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
|
308 |
-
"Avoid abrupt transitions."
|
309 |
-
)
|
310 |
-
else:
|
311 |
-
sponsor_instructions = "" # No sponsor instructions if sponsor_provided is empty
|
312 |
-
|
313 |
-
prompt = (
|
314 |
-
f"{system_prompt}\n"
|
315 |
-
f"TONE: {chosen_tone}\n"
|
316 |
-
f"TARGET LENGTH: {target_length} (~{min_words}-{max_words} words)\n"
|
317 |
-
f"INPUT TEXT: {input_text}\n\n"
|
318 |
-
f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
|
319 |
-
"Please provide the output in the following JSON format without any additional text:\n\n"
|
320 |
-
"{\n"
|
321 |
-
' "dialogue": [\n'
|
322 |
-
' {\n'
|
323 |
-
' "speaker": "Jane",\n'
|
324 |
-
' "text": "..." \n'
|
325 |
-
' },\n'
|
326 |
-
' {\n'
|
327 |
-
' "speaker": "John",\n'
|
328 |
-
' "text": "..." \n'
|
329 |
-
' }\n'
|
330 |
-
" ]\n"
|
331 |
-
"}"
|
332 |
-
)
|
333 |
-
print("[LOG] Sending prompt to Groq:")
|
334 |
-
print(prompt)
|
335 |
|
336 |
-
|
337 |
-
|
338 |
-
messages=[{"role": "system", "content": prompt}],
|
339 |
-
model="llama-3.3-70b-versatile",
|
340 |
-
max_tokens=2048,
|
341 |
-
temperature=0.7
|
342 |
-
)
|
343 |
-
except Exception as e:
|
344 |
-
print("[ERROR] Groq API error:", e)
|
345 |
-
raise ValueError(f"Error communicating with Groq API: {str(e)}")
|
346 |
|
347 |
-
|
348 |
-
start_index = raw_content.find('{')
|
349 |
-
end_index = raw_content.rfind('}')
|
350 |
-
if start_index == -1 or end_index == -1:
|
351 |
-
raise ValueError("Failed to parse dialogue: No JSON found.")
|
352 |
|
353 |
-
|
354 |
|
355 |
-
try:
|
356 |
-
data = json.loads(json_str)
|
357 |
-
dialogue_list = data.get("dialogue", [])
|
358 |
-
|
359 |
-
for d in dialogue_list:
|
360 |
-
raw_speaker = d.get("speaker", "Jane")
|
361 |
-
if raw_speaker.lower() == host_name.lower():
|
362 |
-
d["speaker"] = "Jane"
|
363 |
-
d["display_speaker"] = host_name
|
364 |
-
elif raw_speaker.lower() == guest_name.lower():
|
365 |
-
d["speaker"] = "John"
|
366 |
-
d["display_speaker"] = guest_name
|
367 |
-
else:
|
368 |
-
d["speaker"] = "Jane"
|
369 |
-
d["display_speaker"] = raw_speaker
|
370 |
-
|
371 |
-
new_dialogue_items = []
|
372 |
-
for d in dialogue_list:
|
373 |
-
if "display_speaker" not in d:
|
374 |
-
d["display_speaker"] = d["speaker"]
|
375 |
-
new_dialogue_items.append(DialogueItem(**d))
|
376 |
-
|
377 |
-
return Dialogue(dialogue=new_dialogue_items)
|
378 |
-
except json.JSONDecodeError as e:
|
379 |
-
print("[ERROR] JSON decoding (format) failed:", e)
|
380 |
-
raise ValueError(f"Failed to parse dialogue: {str(e)}")
|
381 |
-
except Exception as e:
|
382 |
-
print("[ERROR] JSON decoding failed:", e)
|
383 |
-
raise ValueError(f"Failed to parse dialogue: {str(e)}")
|
384 |
-
|
385 |
-
def transcribe_youtube_video(video_url: str) -> str:
|
386 |
-
print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
|
387 |
-
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
|
388 |
-
if not video_id_match:
|
389 |
-
raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
|
390 |
-
|
391 |
-
video_id = video_id_match.group(1)
|
392 |
-
print("[LOG] Extracted video ID:", video_id)
|
393 |
-
|
394 |
-
base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
|
395 |
-
params = {
|
396 |
-
"video_id": video_id,
|
397 |
-
"lang": "en"
|
398 |
-
}
|
399 |
-
headers = {
|
400 |
-
"x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
|
401 |
-
"x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
|
402 |
-
}
|
403 |
|
404 |
-
|
405 |
-
response = requests.get(base_url, headers=headers, params=params, timeout=30)
|
406 |
-
print("[LOG] RapidAPI Response Status Code:", response.status_code)
|
407 |
-
print("[LOG] RapidAPI Response Body:", response.text)
|
408 |
|
409 |
-
|
410 |
-
raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
|
411 |
|
412 |
-
|
413 |
-
if not isinstance(data, list) or not data:
|
414 |
-
raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
|
415 |
|
416 |
-
|
417 |
-
if not transcript_as_text:
|
418 |
-
raise ValueError("transcriptionAsText field is missing or empty.")
|
419 |
|
420 |
-
|
421 |
-
print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
|
422 |
-
snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
|
423 |
-
print(f"[DEBUG] Transcript Snippet: {snippet}")
|
424 |
|
425 |
-
|
426 |
|
427 |
-
|
428 |
-
print("[ERROR] RapidAPI transcription error:", e)
|
429 |
-
raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
|
430 |
|
431 |
-
|
432 |
-
"""
|
433 |
-
Calls Deepgram TTS with the text, returning a path to a temp MP3 file.
|
434 |
-
We also do some pre-processing for punctuation, abbreviations, numeric expansions,
|
435 |
-
plus emotive expressions (ha, sigh, etc.).
|
436 |
-
"""
|
437 |
-
try:
|
438 |
-
print(f"[LOG] Generating audio for speaker: {speaker}")
|
439 |
-
processed_text = _preprocess_text_for_tts(text, speaker)
|
440 |
|
441 |
-
|
442 |
-
params = {
|
443 |
-
"model": "aura-asteria-en", # female by default
|
444 |
-
}
|
445 |
-
if speaker == "John":
|
446 |
-
params["model"] = "aura-zeus-en"
|
447 |
|
448 |
-
|
449 |
-
"Accept": "audio/mpeg",
|
450 |
-
"Content-Type": "application/json",
|
451 |
-
"Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
|
452 |
-
}
|
453 |
-
body = {
|
454 |
-
"text": processed_text
|
455 |
-
}
|
456 |
|
457 |
-
|
458 |
-
if response.status_code != 200:
|
459 |
-
raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
|
460 |
|
461 |
-
|
462 |
-
if 'audio/mpeg' not in content_type:
|
463 |
-
raise ValueError("Unexpected Content-Type from Deepgram.")
|
464 |
|
465 |
-
|
466 |
-
for chunk in response.iter_content(chunk_size=8192):
|
467 |
-
if chunk:
|
468 |
-
mp3_file.write(chunk)
|
469 |
-
mp3_path = mp3_file.name
|
470 |
|
471 |
-
|
472 |
-
audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
|
473 |
-
audio_seg = effects.normalize(audio_seg)
|
474 |
|
475 |
-
|
476 |
-
audio_seg.export(final_mp3_path, format="mp3")
|
477 |
|
478 |
-
|
479 |
-
os.remove(mp3_path)
|
480 |
|
481 |
-
|
482 |
-
except Exception as e:
|
483 |
-
print("[ERROR] Error generating audio:", e)
|
484 |
-
raise ValueError(f"Error generating audio: {str(e)}")
|
485 |
|
486 |
-
|
487 |
-
pass
|
488 |
|
489 |
-
|
490 |
-
|
491 |
-
1) "SaaS" => "sass"
|
492 |
-
2) Insert periods for uppercase abbreviations -> remove for TTS
|
493 |
-
3) Preserve numbers for natural TTS pronunciation
|
494 |
-
4) Expand leftover all-caps
|
495 |
-
5) Emotive placeholders for 'ha', 'haha', 'sigh', 'groan', etc.
|
496 |
-
6) If speaker != Jane, insert filler words
|
497 |
-
7) Remove random fillers
|
498 |
-
8) Capitalize sentence starts
|
499 |
-
"""
|
500 |
-
# 1) "SaaS" => "sass"
|
501 |
-
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
502 |
-
|
503 |
-
# 2) Insert periods in uppercase abbreviations (>=2 chars), then remove them
|
504 |
-
def insert_periods_for_abbrev(m):
|
505 |
-
abbr = m.group(0)
|
506 |
-
parted = ".".join(list(abbr)) + "."
|
507 |
-
return parted
|
508 |
-
text = re.sub(r"\b([A-Z0-9]{2,})\b", insert_periods_for_abbrev, text)
|
509 |
-
text = re.sub(r"\.\.", ".", text)
|
510 |
-
def remove_periods_for_tts(m):
|
511 |
-
chunk = m.group(0)
|
512 |
-
return chunk.replace(".", " ").strip()
|
513 |
-
text = re.sub(r"[A-Z0-9]\.[A-Z0-9](?:\.[A-Z0-9])*\.", remove_periods_for_tts, text)
|
514 |
-
|
515 |
-
# 3) Preserve numbers by removing any digit-specific processing
|
516 |
-
# Let TTS handle natural number pronunciation
|
517 |
-
|
518 |
-
# 4) Hyphens -> spaces (but preserve hyphenated numbers)
|
519 |
-
text = re.sub(r"(?<!\d)-(?!\d)", " ", text)
|
520 |
-
|
521 |
-
# 5) Emotive placeholders
|
522 |
-
text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
|
523 |
-
text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
|
524 |
-
text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
|
525 |
-
|
526 |
-
# 6) Insert filler words if speaker != "Jane"
|
527 |
-
if speaker != "Jane":
|
528 |
-
def insert_thinking_pause(m):
|
529 |
-
word = m.group(1)
|
530 |
-
if random.random() < 0.3:
|
531 |
-
filler = random.choice(['hmm,', 'well,', 'let me see,'])
|
532 |
-
return f"{word}..., {filler}"
|
533 |
-
else:
|
534 |
-
return f"{word}...,"
|
535 |
-
keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
|
536 |
-
text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
|
537 |
-
|
538 |
-
conj_pattern = r"\b(and|but|so|because|however)\b"
|
539 |
-
text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
|
540 |
-
|
541 |
-
# 7) Remove random fillers
|
542 |
-
text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
|
543 |
-
|
544 |
-
# 8) Capitalize sentence starts
|
545 |
-
def capitalize_match(m):
|
546 |
-
return m.group().upper()
|
547 |
-
text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
|
548 |
-
|
549 |
-
return text.strip()
|
550 |
-
|
551 |
-
def _spell_digits(d: str) -> str:
|
552 |
-
"""
|
553 |
-
Convert individual digits '3' -> 'three'.
|
554 |
-
"""
|
555 |
-
digit_map = {
|
556 |
-
'0': 'zero',
|
557 |
-
'1': 'one',
|
558 |
-
'2': 'two',
|
559 |
-
'3': 'three',
|
560 |
-
'4': 'four',
|
561 |
-
'5': 'five',
|
562 |
-
'6': 'six',
|
563 |
-
'7': 'seven',
|
564 |
-
'8': 'eight',
|
565 |
-
'9': 'nine'
|
566 |
-
}
|
567 |
-
return " ".join(digit_map[ch] for ch in d if ch in digit_map)
|
568 |
|
569 |
-
def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
|
570 |
-
"""
|
571 |
-
Mixes 'spoken' with a default bg_music.mp3 or user-provided custom music:
|
572 |
-
1) Start with 2 seconds of music alone before speech begins.
|
573 |
-
2) Loop the music if it's shorter than the final audio length.
|
574 |
-
3) Lower music volume so the speech is clear.
|
575 |
-
"""
|
576 |
-
if custom_music_path:
|
577 |
-
music_path = custom_music_path
|
578 |
-
else:
|
579 |
-
music_path = "bg_music.mp3"
|
580 |
|
581 |
-
|
582 |
-
|
583 |
-
except Exception as e:
|
584 |
-
print("[ERROR] Failed to load background music:", e)
|
585 |
-
return spoken
|
586 |
|
587 |
-
|
588 |
|
589 |
-
|
590 |
-
looped_music = AudioSegment.empty()
|
591 |
-
while len(looped_music) < total_length_ms:
|
592 |
-
looped_music += bg_music
|
593 |
|
594 |
-
|
595 |
-
final_mix = looped_music.overlay(spoken, position=2000)
|
596 |
-
return final_mix
|
597 |
|
598 |
-
|
599 |
-
def call_groq_api_for_qa(system_prompt: str) -> str:
|
600 |
-
"""
|
601 |
-
A minimal placeholder for your short Q&A LLM call.
|
602 |
-
Must return a JSON string, e.g.:
|
603 |
-
{"speaker": "John", "text": "Short answer here"}
|
604 |
-
"""
|
605 |
-
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
606 |
-
try:
|
607 |
-
response = groq_client.chat.completions.create(
|
608 |
-
messages=[{"role": "system", "content": system_prompt}],
|
609 |
-
model="llama-3.3-70b-versatile",
|
610 |
-
max_tokens=512,
|
611 |
-
temperature=0.7
|
612 |
-
)
|
613 |
-
except Exception as e:
|
614 |
-
print("[ERROR] Groq API error:", e)
|
615 |
-
fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
|
616 |
-
return json.dumps(fallback)
|
617 |
|
618 |
-
|
619 |
-
|
620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import re
|
3 |
import json
|
|
|
13 |
from groq import Groq
|
14 |
import numpy as np
|
15 |
import torch
|
|
|
16 |
|
17 |
class DialogueItem(BaseModel):
|
18 |
+
speaker: Literal["Jane", "John"] # TTS voice
|
19 |
+
display_speaker: str = "Jane" # For display in transcript
|
20 |
text: str
|
21 |
|
22 |
class Dialogue(BaseModel):
|
|
|
44 |
|
45 |
def extract_text_from_url(url):
|
46 |
"""
|
47 |
+
Fetches and extracts readable text from a given URL (stripping out scripts, styles, etc.).
|
|
|
48 |
"""
|
49 |
print("[LOG] Extracting text from URL:", url)
|
50 |
try:
|
|
|
81 |
|
82 |
def is_sufficient(text: str, min_word_count: int = 500) -> bool:
|
83 |
"""
|
84 |
+
Checks if the fetched text meets our sufficiency criteria (e.g., at least 500 words).
|
|
|
85 |
"""
|
86 |
word_count = len(text.split())
|
87 |
print(f"[DEBUG] Aggregated word count: {word_count}")
|
|
|
93 |
Appends it to our aggregated info if found.
|
94 |
"""
|
95 |
print("[LOG] Querying LLM for additional information.")
|
96 |
+
|
97 |
system_prompt = (
|
98 |
"You are an AI assistant with extensive knowledge up to 2023-10. "
|
99 |
"Provide additional relevant information on the following topic based on your knowledge base.\n\n"
|
|
|
101 |
f"Existing Information: {existing_text}\n\n"
|
102 |
"Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
|
103 |
)
|
104 |
+
|
105 |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
106 |
+
|
107 |
try:
|
108 |
response = groq_client.chat.completions.create(
|
109 |
messages=[{"role": "system", "content": system_prompt}],
|
|
|
111 |
max_tokens=1024,
|
112 |
temperature=0.7
|
113 |
)
|
114 |
+
|
115 |
+
additional_info = response.choices[0].message.content.strip()
|
116 |
+
print("[DEBUG] Additional information from LLM:")
|
117 |
+
print(additional_info)
|
118 |
+
return additional_info
|
119 |
+
|
120 |
except Exception as e:
|
121 |
print("[ERROR] Groq API error during fallback:", e)
|
122 |
return ""
|
|
|
|
|
|
|
|
|
123 |
|
124 |
def research_topic(topic: str) -> str:
|
125 |
"""
|
126 |
Gathers info from various RSS feeds and Wikipedia. If needed, queries the LLM
|
127 |
for more data if the aggregated text is insufficient.
|
128 |
"""
|
129 |
+
|
130 |
sources = {
|
131 |
"BBC": "https://feeds.bbci.co.uk/news/rss.xml",
|
132 |
"CNN": "http://rss.cnn.com/rss/edition.rss",
|
|
|
138 |
"Google News - Custom": f"https://news.google.com/rss/search?q={requests.utils.quote(topic)}&hl=en-IN&gl=IN&ceid=IN:en",
|
139 |
}
|
140 |
|
141 |
+
summary_parts = [] # Wikipedia summary
|
142 |
+
wiki_summary = fetch_wikipedia_summary(topic)
|
143 |
+
|
144 |
+
if wiki_summary:
|
145 |
+
summary_parts.append(f"From Wikipedia: {wiki_summary}")
|
146 |
+
|
147 |
+
# For each RSS feed
|
148 |
+
for name, feed_url in sources.items():
|
149 |
+
try:
|
150 |
+
items = fetch_rss_feed(feed_url)
|
151 |
+
if not items:
|
152 |
+
continue
|
153 |
+
|
154 |
+
title, desc, link = find_relevant_article(items, topic, min_match=2)
|
155 |
+
|
156 |
+
if link:
|
157 |
+
article_text = fetch_article_text(link)
|
158 |
+
if article_text:
|
159 |
+
summary_parts.append(f"From {name}: {article_text}")
|
160 |
+
else:
|
161 |
+
summary_parts.append(f"From {name}: {title} - {desc}")
|
162 |
+
|
163 |
+
except Exception as e:
|
164 |
+
print(f"[ERROR] Error fetching from {name} RSS feed:", e)
|
165 |
+
continue
|
166 |
+
|
167 |
+
aggregated_info = " ".join(summary_parts)
|
168 |
+
|
169 |
+
print("[DEBUG] Aggregated info from primary sources:")
|
170 |
+
print(aggregated_info)
|
171 |
+
|
172 |
+
# If not enough data, fallback to LLM
|
173 |
+
if not is_sufficient(aggregated_info):
|
174 |
+
print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
|
175 |
+
additional_info = query_llm_for_additional_info(topic, aggregated_info)
|
176 |
+
|
177 |
+
if additional_info:
|
178 |
+
aggregated_info += " " + additional_info
|
179 |
+
else:
|
180 |
+
print("[ERROR] Failed to retrieve additional info from LLM.")
|
181 |
+
|
182 |
+
if not aggregated_info:
|
183 |
+
return f"Sorry, I couldn't find recent information on '{topic}'."
|
184 |
+
|
185 |
+
return aggregated_info
|
186 |
|
187 |
def fetch_wikipedia_summary(topic: str) -> str:
|
188 |
+
"""
|
189 |
+
Fetch a quick Wikipedia summary of the topic via the official Wikipedia API.
|
190 |
+
"""
|
191 |
+
print("[LOG] Fetching Wikipedia summary for:", topic)
|
192 |
+
|
193 |
+
try:
|
194 |
+
search_url = (
|
195 |
+
f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}"
|
196 |
+
"&limit=1&namespace=0&format=json"
|
197 |
+
)
|
198 |
+
|
199 |
+
resp = requests.get(search_url)
|
200 |
+
if resp.status_code != 200:
|
201 |
+
print(f"[ERROR] Failed to fetch Wikipedia search results for {topic}")
|
202 |
+
return ""
|
203 |
+
|
204 |
+
data = resp.json()
|
205 |
+
if len(data) > 1 and data[1]:
|
206 |
+
title = data[1][0]
|
207 |
+
summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
|
208 |
+
s_resp = requests.get(summary_url)
|
209 |
+
|
210 |
+
if s_resp.status_code == 200:
|
211 |
+
s_data = s_resp.json()
|
212 |
+
if "extract" in s_data:
|
213 |
+
print("[LOG] Wikipedia summary fetched successfully.")
|
214 |
+
return s_data["extract"]
|
215 |
+
return ""
|
216 |
+
|
217 |
+
except Exception as e:
|
218 |
+
print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
|
219 |
+
return ""
|
220 |
|
221 |
def fetch_rss_feed(feed_url: str) -> list:
|
222 |
+
"""
|
223 |
+
Pulls RSS feed data from a given URL and returns items.
|
224 |
+
"""
|
225 |
+
print("[LOG] Fetching RSS feed:", feed_url)
|
226 |
+
|
227 |
+
try:
|
228 |
+
resp = requests.get(feed_url)
|
229 |
+
if resp.status_code != 200:
|
230 |
+
print(f"[ERROR] Failed to fetch RSS feed: {feed_url}")
|
231 |
+
return []
|
232 |
+
|
233 |
+
soup = BeautifulSoup(resp.content, "xml")
|
234 |
+
items = soup.find_all("item")
|
235 |
+
return items
|
236 |
+
|
237 |
+
except Exception as e:
|
238 |
+
print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
|
239 |
+
return []
|
240 |
|
241 |
def find_relevant_article(items, topic: str, min_match=2) -> tuple:
|
242 |
+
"""
|
243 |
+
Check each article in the RSS feed for mention of the topic by counting
|
244 |
+
the number of keyword matches.
|
245 |
+
"""
|
246 |
+
print("[LOG] Finding relevant articles...")
|
247 |
+
|
248 |
+
keywords = re.findall(r'\w+', topic.lower())
|
249 |
+
|
250 |
+
for item in items:
|
251 |
+
title = item.find("title").get_text().strip() if item.find("title") else ""
|
252 |
+
description = item.find("description").get_text().strip() if item.find("description") else ""
|
253 |
+
|
254 |
+
text = (title + " " + description).lower()
|
255 |
+
|
256 |
+
matches = sum(1 for kw in keywords if kw in text)
|
257 |
+
|
258 |
+
if matches >= min_match:
|
259 |
+
link = item.find("link").get_text().strip() if item.find("link") else ""
|
260 |
+
print(f"[LOG] Relevant article found: {title}")
|
261 |
+
return title, description, link
|
262 |
+
|
263 |
+
return None, None, None
|
264 |
|
265 |
def fetch_article_text(link: str) -> str:
|
266 |
+
"""
|
267 |
+
Fetch the article text from the given link (first 5 paragraphs).
|
268 |
+
"""
|
269 |
+
print("[LOG] Fetching article text from:", link)
|
270 |
+
|
271 |
+
if not link:
|
272 |
+
print("[LOG] No link provided for article text.")
|
273 |
+
return ""
|
274 |
+
|
275 |
+
try:
|
276 |
+
resp = requests.get(link)
|
277 |
+
|
278 |
+
if resp.status_code != 200:
|
279 |
+
print(f"[ERROR] Failed to fetch article from {link}")
|
280 |
+
return ""
|
281 |
+
|
282 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
283 |
+
|
284 |
+
paragraphs = soup.find_all("p")
|
285 |
+
|
286 |
+
text = " ".join(p.get_text() for p in paragraphs[:5]) # first 5 paragraphs
|
287 |
+
|
288 |
+
print("[LOG] Article text fetched successfully.")
|
289 |
+
|
290 |
+
return text.strip()
|
291 |
+
|
292 |
+
except Exception as e:
|
293 |
+
print(f"[ERROR] Error fetching article text: {e}")
|
294 |
+
return ""
|
295 |
|
296 |
def generate_script(
|
297 |
+
system_prompt: str,
|
298 |
+
input_text: str,
|
299 |
+
tone: str,
|
300 |
+
target_length: str,
|
301 |
+
host_name: str = "Jane",
|
302 |
+
guest_name: str = "John",
|
303 |
+
sponsor_style: str = "Separate Break",
|
304 |
+
sponsor_provided=None # Accept sponsor_provided parameter
|
305 |
):
|
306 |
+
|
307 |
+
print("[LOG] Generating script with tone:", tone, "and length:", target_length)
|
308 |
+
|
309 |
+
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
310 |
+
|
311 |
+
words_per_minute = 150
|
312 |
+
numeric_minutes = 3
|
313 |
+
|
314 |
+
match = re.search(r"(\d+)", target_length)
|
315 |
+
|
316 |
+
if match:
|
317 |
+
numeric_minutes = int(match.group(1))
|
318 |
+
|
319 |
+
min_words = max(50, numeric_minutes * 100)
|
320 |
+
|
321 |
+
max_words = numeric_minutes * words_per_minute
|
322 |
+
|
323 |
+
|
324 |
+
|
325 |
+
|
326 |
+
|
327 |
+
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
|
332 |
+
# Tone mapping dictionary
|
333 |
+
|
334 |
+
tone_map={
|
335 |
+
"Humorous":"funny and exciting,makes people chuckle",
|
336 |
+
"Formal":"business-like,wells-structured,"professional",
|
337 |
+
"Casual":"like a conversation between close friends,"relaxed and informal",
|
338 |
+
"Youthful":"like how teenagers might chat,"energetic and lively"
|
339 |
+
}
|
340 |
+
|
341 |
+
chosen_tone=tone_map.get(tone,"casual")
|
342 |
+
|
343 |
+
# Determine sponsor instructions based on sponsor_provided and sponsor_style
|
344 |
+
|
345 |
+
if sponsor_provided:
|
346 |
+
|
347 |
+
if sponsor_style=="Separate Break":
|
348 |
+
sponsor_instructions=(
|
349 |
+
|
350 |
+
"If sponsor content is provided,"include it in a separate ad break (~30 seconds). "
|
351 |
+
|
352 |
+
"Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
|
353 |
+
)
|
354 |
|
355 |
+
else:
|
|
|
|
|
|
|
|
|
356 |
|
357 |
+
sponsor_instructions=(
|
|
|
358 |
|
359 |
+
"If sponsor content is provided,"blend it naturally (~30 seconds) into the conversation."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
|
361 |
+
"Avoid abrupt transitions."
|
362 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
+
else:
|
|
|
|
|
|
|
|
|
365 |
|
366 |
+
sponsor_instructions="" # No sponsor instructions if sponsor_provided is empty
|
367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
|
369 |
+
prompt=(
|
|
|
|
|
|
|
370 |
|
371 |
+
f"{system_prompt}\n"
|
|
|
372 |
|
373 |
+
f"TONE:{chosen_tone}\n"
|
|
|
|
|
374 |
|
375 |
+
f"TARGET LENGTH:{target_length} (~{min_words}-{max_words} words)\n"
|
|
|
|
|
376 |
|
377 |
+
f"INPUT TEXT:{input_text}\n\n"
|
|
|
|
|
|
|
378 |
|
379 |
+
f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
|
380 |
|
381 |
+
"Please provide the output in the following JSON format without any additional text:\n\n"
|
|
|
|
|
382 |
|
383 |
+
"{\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
384 |
|
385 |
+
' "dialogue":[\n'
|
|
|
|
|
|
|
|
|
|
|
386 |
|
387 |
+
' {\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
|
389 |
+
' "speaker":"Jane",\n'
|
|
|
|
|
390 |
|
391 |
+
' "text":"..."\n'
|
|
|
|
|
392 |
|
393 |
+
' },\n'
|
|
|
|
|
|
|
|
|
394 |
|
395 |
+
' {\n'
|
|
|
|
|
396 |
|
397 |
+
' "speaker":"John",\n'
|
|
|
398 |
|
399 |
+
' "text":"..."\n'
|
|
|
400 |
|
401 |
+
' }\n'
|
|
|
|
|
|
|
402 |
|
403 |
+
" ]\n"
|
|
|
404 |
|
405 |
+
"}"
|
406 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
|
409 |
+
print("[LOG] Sending prompt to Groq:")
|
410 |
+
print(prompt)
|
|
|
|
|
|
|
411 |
|
412 |
+
try:
|
413 |
|
414 |
+
response=groq_client.chat.completions.create(
|
|
|
|
|
|
|
415 |
|
416 |
+
messages=[{"role":"system","content":prompt}],
|
|
|
|
|
417 |
|
418 |
+
model="llama-3.3-70b-versatile",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
|
420 |
+
max_tokens=2048,
|
421 |
+
|
422 |
+
temperature=0.7
|
423 |
+
|
424 |
+
|
425 |
+
|
426 |
+
except Exception as e:
|
427 |
+
|
428 |
+
print("[ERROR] Groq API error:", e)
|
429 |
+
|
430 |
+
raise ValueError(f"Error communicating with Groq API:{str(e)}")
|
431 |
+
|
432 |
+
raw_content=response.choices[0].message.content.strip()
|
433 |
+
|
434 |
+
start_index=raw_content.find('{')
|
435 |
+
|
436 |
+
end_index=raw_content.rfind('}')
|
437 |
+
|
438 |
+
if start_index==-1 or end_index==-1:
|
439 |
+
|
440 |
+
raise ValueError("Failed to parse dialogue:No JSON found.")
|
441 |
+
|
442 |
+
json_str=raw_content[start_index:end_index+1].strip()
|
443 |
+
|
444 |
+
try:
|
445 |
+
|
446 |
+
data=json.loads(json_str)
|
447 |
+
|
448 |
+
dialogue_list=data.get("dialogue",[])
|
449 |
+
|
450 |
+
for d in dialogue_list:
|
451 |
+
|
452 |
+
raw_speaker=d.get("speaker","Jane")
|
453 |
+
|
454 |
+
if raw_speaker.lower()==host_name.lower():
|
455 |
+
|
456 |
+
d["speaker"]="Jane"
|
457 |
+
|
458 |
+
d["display_speaker"]=host_name
|
459 |
+
|
460 |
+
elif raw_speaker.lower()==guest_name.lower():
|
461 |
+
|
462 |
+
d["speaker"]="John"
|
463 |
+
|
464 |
+
d["display_speaker"]=guest_name
|
465 |
+
|
466 |
+
else:
|
467 |
+
|
468 |
+
d["speaker"]="Jane"
|
469 |
+
|
470 |
+
d["display_speaker"]=raw_speaker
|
471 |
+
|
472 |
+
new_dialogue_items=[]
|
473 |
+
|
474 |
+
for d in dialogue_list:
|
475 |
+
|
476 |
+
if “display_speaker” not in d:
|
477 |
+
|
478 |
+
d[“display_speaker”]=d[“speaker”]
|
479 |
+
|
480 |
+
new_dialogue_items.append(DialogueItem(**d))
|
481 |
+
|
482 |
+
return Dialogue(dialogue=new_dialogue_items)
|
483 |
+
|
484 |
+
except json.JSONDecodeError as e:
|
485 |
+
|
486 |
+
print("[ERROR] JSON decoding(format) failed:", e)
|
487 |
+
|
488 |
+
raise ValueError(f"Failed to parse dialogue:{str(e)}")
|
489 |
+
|
490 |
+
except Exception as e:
|
491 |
+
|
492 |
+
print("[ERROR] JSON decoding failed:", e)
|
493 |
+
|
494 |
+
raise ValueError(f"Failed to parse dialogue:{str(e)}")
|
495 |
+
|
496 |
+
def transcribe_youtube_video(video_url:str)->str:
|
497 |
+
|
498 |
+
print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
|
499 |
+
|
500 |
+
video_id_match=re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
|
501 |
+
|
502 |
+
if not video_id_match:
|
503 |
+
|
504 |
+
raise ValueError(f"Invalid YouTube URL:{video_url},cannot extract video ID.")
|
505 |
+
|
506 |
+
video_id=video_id_match.group(1)
|
507 |
+
|
508 |
+
print("[LOG] Extracted video ID:", video_id)
|
509 |
+
|
510 |
+
base_url="https://youtube-transcriptor.p.rapidapi.com/transcript"
|
511 |
+
|
512 |
+
params={
|
513 |
+
|
514 |
+
"video_id":video_id,
|
515 |
+
|
516 |
+
"lang":"en"
|
517 |
+
}
|
518 |
+
|
519 |
+
headers={
|
520 |
+
|
521 |
+
"x-rapidapi-host":"youtube-transcriptor.p.rapidapi.com",
|
522 |
+
|
523 |
+
"x-rapidapi-key":os.environ.get("RAPIDAPI_KEY")
|
524 |
+
|
525 |
+
}
|
526 |
+
|
527 |
+
try:
|
528 |
+
|
529 |
+
response=requests.get(base_url,headers=headers,params=params,timeouot=30)
|
530 |
+
|
531 |
+
print("[LOG] RapidAPI Response Status Code:",response.status_code)
|
532 |
+
|
533 |
+
print("[LOG] RapidAPI Response Body:",response.text)
|
534 |
+
|
535 |
+
if response.status_code!=200:
|
536 |
+
|
537 |
+
raise ValueError(f"RapidAPI transcription error:{response.status_code},{response.text}")
|
538 |
+
|
539 |
+
data=response.json()
|
540 |
+
|
541 |
+
if not isinstance(data,list) or not data:
|
542 |
+
|
543 |
+
raise ValueError(f"Unexpected transcript format or empty transcript:{data}")
|
544 |
+
|
545 |
+
transcript_as_text=data[0].get('transcriptionAsText','').strip()
|
546 |
+
|
547 |
+
if not transcript_as_text:
|
548 |
+
|
549 |
+
raise ValueError("transcriptionAsText field is missing or empty.")
|
550 |
+
|
551 |
+
print("[LOG] Transcript retrieval successful.")
|
552 |
+
|
553 |
+
print(f"[DEBUG] Transcript Length:{len(transcript_as_text)} characters.")
|
554 |
+
|
555 |
+
snippet=transcript_as_text[:200]+"..."if len(transcript_as_text)>200 else transcript_as_text
|
556 |
+
|
557 |
+
print(f"[DEBUG] Transcript Snippet:{snippet}")
|
558 |
+
|
559 |
+
return transcript_as_text
|
560 |
+
|
561 |
+
except Exception as e:
|
562 |
+
|
563 |
+
print("[ERROR] RapidAPI transcription error:",e)
|
564 |
+
|
565 |
+
raise ValueError(f"Error transcribing YouTube video via RapidAPI:{str(e)}")
|
566 |
+
|
567 |
+
def generate_audio_mp3(text:str,speaker:str)->str:
|
568 |
+
"""
|
569 |
+
Calls Deepgram TTS with the text returning a path to a temp MP3 file.
|
570 |
+
We also do some pre-processing for punctuation abbreviations,
|
571 |
+
numeric expansions plus emotive expressions (ha sigh etc.).
|
572 |
+
"""
|
573 |
+
try:
|
574 |
+
|
575 |
+
print(f"[LOG] Generating audio for speaker:{speaker}")
|
576 |
+
|
577 |
+
processed_text=_preprocess_text_for_tts(text,speaker)
|
578 |
+
|
579 |
+
deepgram_api_url="https://api.deepgram.com/v1/speak"
|
580 |
+
|
581 |
+
params={
|
582 |
+
|
583 |
+
"model":"aura-asteria-en", # female by default
|
584 |
+
}
|
585 |
+
|
586 |
+
if speaker=="John":
|
587 |
+
params["model"]="aura-zeus-en"
|
588 |
+
|
589 |
+
headers={
|
590 |
+
|
591 |
+
"Accept":"audio/mpeg",
|
592 |
+
|
593 |
+
"Content-Type":"application/json",
|
594 |
+
|
595 |
+
"Authorization":f"Token{os.environ.get('DEEPGRAM_API_KEY')}"
|
596 |
+
}
|
597 |
+
|
598 |
+
body={
|
599 |
+
"text":processed_text
|
600 |
+
}
|
601 |
+
|
602 |
+
response=requests.post(deepgram_api_url,param=params ,headers=headers,json=body ,stream=True)
|
603 |
+
|
604 |
+
|
605 |
+
if response.status_code!=200:
|
606 |
+
|
607 |
+
raise ValueError(f"Deepgram TTS error:{response.status_code},{response.text}")
|
608 |
+
|
609 |
+
content_type=response.headers
|