siddhartharyaai commited on
Commit
74cfa8d
·
verified ·
1 Parent(s): 43b0279

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +439 -450
utils.py CHANGED
@@ -1,5 +1,3 @@
1
- # utils.py
2
-
3
  import os
4
  import re
5
  import json
@@ -15,11 +13,10 @@ import tiktoken
15
  from groq import Groq
16
  import numpy as np
17
  import torch
18
- import random
19
 
20
  class DialogueItem(BaseModel):
21
- speaker: Literal["Jane", "John"] # TTS voice
22
- display_speaker: str = "Jane" # For display in transcript
23
  text: str
24
 
25
  class Dialogue(BaseModel):
@@ -47,8 +44,7 @@ def truncate_text(text, max_tokens=2048):
47
 
48
  def extract_text_from_url(url):
49
  """
50
- Fetches and extracts readable text from a given URL
51
- (stripping out scripts, styles, etc.).
52
  """
53
  print("[LOG] Extracting text from URL:", url)
54
  try:
@@ -85,8 +81,7 @@ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
85
 
86
  def is_sufficient(text: str, min_word_count: int = 500) -> bool:
87
  """
88
- Checks if the fetched text meets our sufficiency criteria
89
- (e.g., at least 500 words).
90
  """
91
  word_count = len(text.split())
92
  print(f"[DEBUG] Aggregated word count: {word_count}")
@@ -98,6 +93,7 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
98
  Appends it to our aggregated info if found.
99
  """
100
  print("[LOG] Querying LLM for additional information.")
 
101
  system_prompt = (
102
  "You are an AI assistant with extensive knowledge up to 2023-10. "
103
  "Provide additional relevant information on the following topic based on your knowledge base.\n\n"
@@ -105,7 +101,9 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
105
  f"Existing Information: {existing_text}\n\n"
106
  "Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
107
  )
 
108
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
109
  try:
110
  response = groq_client.chat.completions.create(
111
  messages=[{"role": "system", "content": system_prompt}],
@@ -113,19 +111,22 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
113
  max_tokens=1024,
114
  temperature=0.7
115
  )
 
 
 
 
 
 
116
  except Exception as e:
117
  print("[ERROR] Groq API error during fallback:", e)
118
  return ""
119
- additional_info = response.choices[0].message.content.strip()
120
- print("[DEBUG] Additional information from LLM:")
121
- print(additional_info)
122
- return additional_info
123
 
124
  def research_topic(topic: str) -> str:
125
  """
126
  Gathers info from various RSS feeds and Wikipedia. If needed, queries the LLM
127
  for more data if the aggregated text is insufficient.
128
  """
 
129
  sources = {
130
  "BBC": "https://feeds.bbci.co.uk/news/rss.xml",
131
  "CNN": "http://rss.cnn.com/rss/edition.rss",
@@ -137,484 +138,472 @@ def research_topic(topic: str) -> str:
137
  "Google News - Custom": f"https://news.google.com/rss/search?q={requests.utils.quote(topic)}&hl=en-IN&gl=IN&ceid=IN:en",
138
  }
139
 
140
- summary_parts = []
141
-
142
- # Wikipedia summary
143
- wiki_summary = fetch_wikipedia_summary(topic)
144
- if wiki_summary:
145
- summary_parts.append(f"From Wikipedia: {wiki_summary}")
146
-
147
- # For each RSS feed
148
- for name, feed_url in sources.items():
149
- try:
150
- items = fetch_rss_feed(feed_url)
151
- if not items:
152
- continue
153
- title, desc, link = find_relevant_article(items, topic, min_match=2)
154
- if link:
155
- article_text = fetch_article_text(link)
156
- if article_text:
157
- summary_parts.append(f"From {name}: {article_text}")
158
- else:
159
- summary_parts.append(f"From {name}: {title} - {desc}")
160
- except Exception as e:
161
- print(f"[ERROR] Error fetching from {name} RSS feed:", e)
162
- continue
163
-
164
- aggregated_info = " ".join(summary_parts)
165
- print("[DEBUG] Aggregated info from primary sources:")
166
- print(aggregated_info)
167
-
168
- # If not enough data, fallback to LLM
169
- if not is_sufficient(aggregated_info):
170
- print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
171
- additional_info = query_llm_for_additional_info(topic, aggregated_info)
172
- if additional_info:
173
- aggregated_info += " " + additional_info
174
- else:
175
- print("[ERROR] Failed to retrieve additional info from LLM.")
176
-
177
- if not aggregated_info:
178
- return f"Sorry, I couldn't find recent information on '{topic}'."
179
-
180
- return aggregated_info
 
 
 
 
181
 
182
  def fetch_wikipedia_summary(topic: str) -> str:
183
- """
184
- Fetch a quick Wikipedia summary of the topic via the official Wikipedia API.
185
- """
186
- print("[LOG] Fetching Wikipedia summary for:", topic)
187
- try:
188
- search_url = (
189
- f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}"
190
- "&limit=1&namespace=0&format=json"
191
- )
192
- resp = requests.get(search_url)
193
- if resp.status_code != 200:
194
- print(f"[ERROR] Failed to fetch Wikipedia search results for {topic}")
195
- return ""
196
- data = resp.json()
197
- if len(data) > 1 and data[1]:
198
- title = data[1][0]
199
- summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
200
- s_resp = requests.get(summary_url)
201
- if s_resp.status_code == 200:
202
- s_data = s_resp.json()
203
- if "extract" in s_data:
204
- print("[LOG] Wikipedia summary fetched successfully.")
205
- return s_data["extract"]
206
- return ""
207
- except Exception as e:
208
- print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
209
- return ""
 
 
 
 
 
210
 
211
  def fetch_rss_feed(feed_url: str) -> list:
212
- """
213
- Pulls RSS feed data from a given URL and returns items.
214
- """
215
- print("[LOG] Fetching RSS feed:", feed_url)
216
- try:
217
- resp = requests.get(feed_url)
218
- if resp.status_code != 200:
219
- print(f"[ERROR] Failed to fetch RSS feed: {feed_url}")
220
- return []
221
- soup = BeautifulSoup(resp.content, "xml")
222
- items = soup.find_all("item")
223
- return items
224
- except Exception as e:
225
- print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
226
- return []
 
 
 
227
 
228
  def find_relevant_article(items, topic: str, min_match=2) -> tuple:
229
- """
230
- Check each article in the RSS feed for mention of the topic
231
- by counting the number of keyword matches.
232
- """
233
- print("[LOG] Finding relevant articles...")
234
- keywords = re.findall(r'\w+', topic.lower())
235
- for item in items:
236
- title = item.find("title").get_text().strip() if item.find("title") else ""
237
- description = item.find("description").get_text().strip() if item.find("description") else ""
238
- text = (title + " " + description).lower()
239
- matches = sum(1 for kw in keywords if kw in text)
240
- if matches >= min_match:
241
- link = item.find("link").get_text().strip() if item.find("link") else ""
242
- print(f"[LOG] Relevant article found: {title}")
243
- return title, description, link
244
- return None, None, None
 
 
 
 
 
 
245
 
246
  def fetch_article_text(link: str) -> str:
247
- """
248
- Fetch the article text from the given link (first 5 paragraphs).
249
- """
250
- print("[LOG] Fetching article text from:", link)
251
- if not link:
252
- print("[LOG] No link provided for article text.")
253
- return ""
254
- try:
255
- resp = requests.get(link)
256
- if resp.status_code != 200:
257
- print(f"[ERROR] Failed to fetch article from {link}")
258
- return ""
259
- soup = BeautifulSoup(resp.text, 'html.parser')
260
- paragraphs = soup.find_all("p")
261
- text = " ".join(p.get_text() for p in paragraphs[:5]) # first 5 paragraphs
262
- print("[LOG] Article text fetched successfully.")
263
- return text.strip()
264
- except Exception as e:
265
- print(f"[ERROR] Error fetching article text: {e}")
266
- return ""
 
 
 
 
 
 
 
 
 
267
 
268
  def generate_script(
269
- system_prompt: str,
270
- input_text: str,
271
- tone: str,
272
- target_length: str,
273
- host_name: str = "Jane",
274
- guest_name: str = "John",
275
- sponsor_style: str = "Separate Break",
276
- sponsor_provided=None # Accept sponsor_provided parameter
277
  ):
278
- print("[LOG] Generating script with tone:", tone, "and length:", target_length)
279
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
- words_per_minute = 150
282
- numeric_minutes = 3
283
- match = re.search(r"(\d+)", target_length)
284
- if match:
285
- numeric_minutes = int(match.group(1))
286
 
287
- min_words = max(50, numeric_minutes * 100)
288
- max_words = numeric_minutes * words_per_minute
289
 
290
- tone_map = {
291
- "Humorous": "funny and exciting, makes people chuckle",
292
- "Formal": "business-like, well-structured, professional",
293
- "Casual": "like a conversation between close friends, relaxed and informal",
294
- "Youthful": "like how teenagers might chat, energetic and lively"
295
- }
296
- chosen_tone = tone_map.get(tone, "casual")
297
-
298
- # Determine sponsor instructions based on sponsor_provided and sponsor_style
299
- if sponsor_provided:
300
- if sponsor_style == "Separate Break":
301
- sponsor_instructions = (
302
- "If sponsor content is provided, include it in a separate ad break (~30 seconds). "
303
- "Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
304
- )
305
- else:
306
- sponsor_instructions = (
307
- "If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
308
- "Avoid abrupt transitions."
309
- )
310
- else:
311
- sponsor_instructions = "" # No sponsor instructions if sponsor_provided is empty
312
-
313
- prompt = (
314
- f"{system_prompt}\n"
315
- f"TONE: {chosen_tone}\n"
316
- f"TARGET LENGTH: {target_length} (~{min_words}-{max_words} words)\n"
317
- f"INPUT TEXT: {input_text}\n\n"
318
- f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
319
- "Please provide the output in the following JSON format without any additional text:\n\n"
320
- "{\n"
321
- ' "dialogue": [\n'
322
- ' {\n'
323
- ' "speaker": "Jane",\n'
324
- ' "text": "..." \n'
325
- ' },\n'
326
- ' {\n'
327
- ' "speaker": "John",\n'
328
- ' "text": "..." \n'
329
- ' }\n'
330
- " ]\n"
331
- "}"
332
- )
333
- print("[LOG] Sending prompt to Groq:")
334
- print(prompt)
335
 
336
- try:
337
- response = groq_client.chat.completions.create(
338
- messages=[{"role": "system", "content": prompt}],
339
- model="llama-3.3-70b-versatile",
340
- max_tokens=2048,
341
- temperature=0.7
342
- )
343
- except Exception as e:
344
- print("[ERROR] Groq API error:", e)
345
- raise ValueError(f"Error communicating with Groq API: {str(e)}")
346
 
347
- raw_content = response.choices[0].message.content.strip()
348
- start_index = raw_content.find('{')
349
- end_index = raw_content.rfind('}')
350
- if start_index == -1 or end_index == -1:
351
- raise ValueError("Failed to parse dialogue: No JSON found.")
352
 
353
- json_str = raw_content[start_index:end_index+1].strip()
354
 
355
- try:
356
- data = json.loads(json_str)
357
- dialogue_list = data.get("dialogue", [])
358
-
359
- for d in dialogue_list:
360
- raw_speaker = d.get("speaker", "Jane")
361
- if raw_speaker.lower() == host_name.lower():
362
- d["speaker"] = "Jane"
363
- d["display_speaker"] = host_name
364
- elif raw_speaker.lower() == guest_name.lower():
365
- d["speaker"] = "John"
366
- d["display_speaker"] = guest_name
367
- else:
368
- d["speaker"] = "Jane"
369
- d["display_speaker"] = raw_speaker
370
-
371
- new_dialogue_items = []
372
- for d in dialogue_list:
373
- if "display_speaker" not in d:
374
- d["display_speaker"] = d["speaker"]
375
- new_dialogue_items.append(DialogueItem(**d))
376
-
377
- return Dialogue(dialogue=new_dialogue_items)
378
- except json.JSONDecodeError as e:
379
- print("[ERROR] JSON decoding (format) failed:", e)
380
- raise ValueError(f"Failed to parse dialogue: {str(e)}")
381
- except Exception as e:
382
- print("[ERROR] JSON decoding failed:", e)
383
- raise ValueError(f"Failed to parse dialogue: {str(e)}")
384
-
385
- def transcribe_youtube_video(video_url: str) -> str:
386
- print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
387
- video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
388
- if not video_id_match:
389
- raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
390
-
391
- video_id = video_id_match.group(1)
392
- print("[LOG] Extracted video ID:", video_id)
393
-
394
- base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
395
- params = {
396
- "video_id": video_id,
397
- "lang": "en"
398
- }
399
- headers = {
400
- "x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
401
- "x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
402
- }
403
 
404
- try:
405
- response = requests.get(base_url, headers=headers, params=params, timeout=30)
406
- print("[LOG] RapidAPI Response Status Code:", response.status_code)
407
- print("[LOG] RapidAPI Response Body:", response.text)
408
 
409
- if response.status_code != 200:
410
- raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
411
 
412
- data = response.json()
413
- if not isinstance(data, list) or not data:
414
- raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
415
 
416
- transcript_as_text = data[0].get('transcriptionAsText', '').strip()
417
- if not transcript_as_text:
418
- raise ValueError("transcriptionAsText field is missing or empty.")
419
 
420
- print("[LOG] Transcript retrieval successful.")
421
- print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
422
- snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
423
- print(f"[DEBUG] Transcript Snippet: {snippet}")
424
 
425
- return transcript_as_text
426
 
427
- except Exception as e:
428
- print("[ERROR] RapidAPI transcription error:", e)
429
- raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
430
 
431
- def generate_audio_mp3(text: str, speaker: str) -> str:
432
- """
433
- Calls Deepgram TTS with the text, returning a path to a temp MP3 file.
434
- We also do some pre-processing for punctuation, abbreviations, numeric expansions,
435
- plus emotive expressions (ha, sigh, etc.).
436
- """
437
- try:
438
- print(f"[LOG] Generating audio for speaker: {speaker}")
439
- processed_text = _preprocess_text_for_tts(text, speaker)
440
 
441
- deepgram_api_url = "https://api.deepgram.com/v1/speak"
442
- params = {
443
- "model": "aura-asteria-en", # female by default
444
- }
445
- if speaker == "John":
446
- params["model"] = "aura-zeus-en"
447
 
448
- headers = {
449
- "Accept": "audio/mpeg",
450
- "Content-Type": "application/json",
451
- "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
452
- }
453
- body = {
454
- "text": processed_text
455
- }
456
 
457
- response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
458
- if response.status_code != 200:
459
- raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
460
 
461
- content_type = response.headers.get('Content-Type', '')
462
- if 'audio/mpeg' not in content_type:
463
- raise ValueError("Unexpected Content-Type from Deepgram.")
464
 
465
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
466
- for chunk in response.iter_content(chunk_size=8192):
467
- if chunk:
468
- mp3_file.write(chunk)
469
- mp3_path = mp3_file.name
470
 
471
- # Normalize volume
472
- audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
473
- audio_seg = effects.normalize(audio_seg)
474
 
475
- final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
476
- audio_seg.export(final_mp3_path, format="mp3")
477
 
478
- if os.path.exists(mp3_path):
479
- os.remove(mp3_path)
480
 
481
- return final_mp3_path
482
- except Exception as e:
483
- print("[ERROR] Error generating audio:", e)
484
- raise ValueError(f"Error generating audio: {str(e)}")
485
 
486
- def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
487
- pass
488
 
489
- def _preprocess_text_for_tts(text: str, speaker: str) -> str:
490
- """
491
- 1) "SaaS" => "sass"
492
- 2) Insert periods for uppercase abbreviations -> remove for TTS
493
- 3) Preserve numbers for natural TTS pronunciation
494
- 4) Expand leftover all-caps
495
- 5) Emotive placeholders for 'ha', 'haha', 'sigh', 'groan', etc.
496
- 6) If speaker != Jane, insert filler words
497
- 7) Remove random fillers
498
- 8) Capitalize sentence starts
499
- """
500
- # 1) "SaaS" => "sass"
501
- text = re.sub(r"\b(?i)SaaS\b", "sass", text)
502
-
503
- # 2) Insert periods in uppercase abbreviations (>=2 chars), then remove them
504
- def insert_periods_for_abbrev(m):
505
- abbr = m.group(0)
506
- parted = ".".join(list(abbr)) + "."
507
- return parted
508
- text = re.sub(r"\b([A-Z0-9]{2,})\b", insert_periods_for_abbrev, text)
509
- text = re.sub(r"\.\.", ".", text)
510
- def remove_periods_for_tts(m):
511
- chunk = m.group(0)
512
- return chunk.replace(".", " ").strip()
513
- text = re.sub(r"[A-Z0-9]\.[A-Z0-9](?:\.[A-Z0-9])*\.", remove_periods_for_tts, text)
514
-
515
- # 3) Preserve numbers by removing any digit-specific processing
516
- # Let TTS handle natural number pronunciation
517
-
518
- # 4) Hyphens -> spaces (but preserve hyphenated numbers)
519
- text = re.sub(r"(?<!\d)-(?!\d)", " ", text)
520
-
521
- # 5) Emotive placeholders
522
- text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
523
- text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
524
- text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
525
-
526
- # 6) Insert filler words if speaker != "Jane"
527
- if speaker != "Jane":
528
- def insert_thinking_pause(m):
529
- word = m.group(1)
530
- if random.random() < 0.3:
531
- filler = random.choice(['hmm,', 'well,', 'let me see,'])
532
- return f"{word}..., {filler}"
533
- else:
534
- return f"{word}...,"
535
- keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
536
- text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
537
-
538
- conj_pattern = r"\b(and|but|so|because|however)\b"
539
- text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
540
-
541
- # 7) Remove random fillers
542
- text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
543
-
544
- # 8) Capitalize sentence starts
545
- def capitalize_match(m):
546
- return m.group().upper()
547
- text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
548
-
549
- return text.strip()
550
-
551
- def _spell_digits(d: str) -> str:
552
- """
553
- Convert individual digits '3' -> 'three'.
554
- """
555
- digit_map = {
556
- '0': 'zero',
557
- '1': 'one',
558
- '2': 'two',
559
- '3': 'three',
560
- '4': 'four',
561
- '5': 'five',
562
- '6': 'six',
563
- '7': 'seven',
564
- '8': 'eight',
565
- '9': 'nine'
566
- }
567
- return " ".join(digit_map[ch] for ch in d if ch in digit_map)
568
 
569
- def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
570
- """
571
- Mixes 'spoken' with a default bg_music.mp3 or user-provided custom music:
572
- 1) Start with 2 seconds of music alone before speech begins.
573
- 2) Loop the music if it's shorter than the final audio length.
574
- 3) Lower music volume so the speech is clear.
575
- """
576
- if custom_music_path:
577
- music_path = custom_music_path
578
- else:
579
- music_path = "bg_music.mp3"
580
 
581
- try:
582
- bg_music = AudioSegment.from_file(music_path, format="mp3")
583
- except Exception as e:
584
- print("[ERROR] Failed to load background music:", e)
585
- return spoken
586
 
587
- bg_music = bg_music - 18.0
588
 
589
- total_length_ms = len(spoken) + 2000
590
- looped_music = AudioSegment.empty()
591
- while len(looped_music) < total_length_ms:
592
- looped_music += bg_music
593
 
594
- looped_music = looped_music[:total_length_ms]
595
- final_mix = looped_music.overlay(spoken, position=2000)
596
- return final_mix
597
 
598
- # This function is new for short Q&A calls
599
- def call_groq_api_for_qa(system_prompt: str) -> str:
600
- """
601
- A minimal placeholder for your short Q&A LLM call.
602
- Must return a JSON string, e.g.:
603
- {"speaker": "John", "text": "Short answer here"}
604
- """
605
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
606
- try:
607
- response = groq_client.chat.completions.create(
608
- messages=[{"role": "system", "content": system_prompt}],
609
- model="llama-3.3-70b-versatile",
610
- max_tokens=512,
611
- temperature=0.7
612
- )
613
- except Exception as e:
614
- print("[ERROR] Groq API error:", e)
615
- fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
616
- return json.dumps(fallback)
617
 
618
- raw_content = response.choices[0].message.content.strip()
619
- return raw_content
620
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import re
3
  import json
 
13
  from groq import Groq
14
  import numpy as np
15
  import torch
 
16
 
17
  class DialogueItem(BaseModel):
18
+ speaker: Literal["Jane", "John"] # TTS voice
19
+ display_speaker: str = "Jane" # For display in transcript
20
  text: str
21
 
22
  class Dialogue(BaseModel):
 
44
 
45
  def extract_text_from_url(url):
46
  """
47
+ Fetches and extracts readable text from a given URL (stripping out scripts, styles, etc.).
 
48
  """
49
  print("[LOG] Extracting text from URL:", url)
50
  try:
 
81
 
82
  def is_sufficient(text: str, min_word_count: int = 500) -> bool:
83
  """
84
+ Checks if the fetched text meets our sufficiency criteria (e.g., at least 500 words).
 
85
  """
86
  word_count = len(text.split())
87
  print(f"[DEBUG] Aggregated word count: {word_count}")
 
93
  Appends it to our aggregated info if found.
94
  """
95
  print("[LOG] Querying LLM for additional information.")
96
+
97
  system_prompt = (
98
  "You are an AI assistant with extensive knowledge up to 2023-10. "
99
  "Provide additional relevant information on the following topic based on your knowledge base.\n\n"
 
101
  f"Existing Information: {existing_text}\n\n"
102
  "Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
103
  )
104
+
105
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
106
+
107
  try:
108
  response = groq_client.chat.completions.create(
109
  messages=[{"role": "system", "content": system_prompt}],
 
111
  max_tokens=1024,
112
  temperature=0.7
113
  )
114
+
115
+ additional_info = response.choices[0].message.content.strip()
116
+ print("[DEBUG] Additional information from LLM:")
117
+ print(additional_info)
118
+ return additional_info
119
+
120
  except Exception as e:
121
  print("[ERROR] Groq API error during fallback:", e)
122
  return ""
 
 
 
 
123
 
124
  def research_topic(topic: str) -> str:
125
  """
126
  Gathers info from various RSS feeds and Wikipedia. If needed, queries the LLM
127
  for more data if the aggregated text is insufficient.
128
  """
129
+
130
  sources = {
131
  "BBC": "https://feeds.bbci.co.uk/news/rss.xml",
132
  "CNN": "http://rss.cnn.com/rss/edition.rss",
 
138
  "Google News - Custom": f"https://news.google.com/rss/search?q={requests.utils.quote(topic)}&hl=en-IN&gl=IN&ceid=IN:en",
139
  }
140
 
141
+ summary_parts = [] # Wikipedia summary
142
+ wiki_summary = fetch_wikipedia_summary(topic)
143
+
144
+ if wiki_summary:
145
+ summary_parts.append(f"From Wikipedia: {wiki_summary}")
146
+
147
+ # For each RSS feed
148
+ for name, feed_url in sources.items():
149
+ try:
150
+ items = fetch_rss_feed(feed_url)
151
+ if not items:
152
+ continue
153
+
154
+ title, desc, link = find_relevant_article(items, topic, min_match=2)
155
+
156
+ if link:
157
+ article_text = fetch_article_text(link)
158
+ if article_text:
159
+ summary_parts.append(f"From {name}: {article_text}")
160
+ else:
161
+ summary_parts.append(f"From {name}: {title} - {desc}")
162
+
163
+ except Exception as e:
164
+ print(f"[ERROR] Error fetching from {name} RSS feed:", e)
165
+ continue
166
+
167
+ aggregated_info = " ".join(summary_parts)
168
+
169
+ print("[DEBUG] Aggregated info from primary sources:")
170
+ print(aggregated_info)
171
+
172
+ # If not enough data, fallback to LLM
173
+ if not is_sufficient(aggregated_info):
174
+ print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
175
+ additional_info = query_llm_for_additional_info(topic, aggregated_info)
176
+
177
+ if additional_info:
178
+ aggregated_info += " " + additional_info
179
+ else:
180
+ print("[ERROR] Failed to retrieve additional info from LLM.")
181
+
182
+ if not aggregated_info:
183
+ return f"Sorry, I couldn't find recent information on '{topic}'."
184
+
185
+ return aggregated_info
186
 
187
  def fetch_wikipedia_summary(topic: str) -> str:
188
+ """
189
+ Fetch a quick Wikipedia summary of the topic via the official Wikipedia API.
190
+ """
191
+ print("[LOG] Fetching Wikipedia summary for:", topic)
192
+
193
+ try:
194
+ search_url = (
195
+ f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}"
196
+ "&limit=1&namespace=0&format=json"
197
+ )
198
+
199
+ resp = requests.get(search_url)
200
+ if resp.status_code != 200:
201
+ print(f"[ERROR] Failed to fetch Wikipedia search results for {topic}")
202
+ return ""
203
+
204
+ data = resp.json()
205
+ if len(data) > 1 and data[1]:
206
+ title = data[1][0]
207
+ summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
208
+ s_resp = requests.get(summary_url)
209
+
210
+ if s_resp.status_code == 200:
211
+ s_data = s_resp.json()
212
+ if "extract" in s_data:
213
+ print("[LOG] Wikipedia summary fetched successfully.")
214
+ return s_data["extract"]
215
+ return ""
216
+
217
+ except Exception as e:
218
+ print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
219
+ return ""
220
 
221
  def fetch_rss_feed(feed_url: str) -> list:
222
+ """
223
+ Pulls RSS feed data from a given URL and returns items.
224
+ """
225
+ print("[LOG] Fetching RSS feed:", feed_url)
226
+
227
+ try:
228
+ resp = requests.get(feed_url)
229
+ if resp.status_code != 200:
230
+ print(f"[ERROR] Failed to fetch RSS feed: {feed_url}")
231
+ return []
232
+
233
+ soup = BeautifulSoup(resp.content, "xml")
234
+ items = soup.find_all("item")
235
+ return items
236
+
237
+ except Exception as e:
238
+ print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
239
+ return []
240
 
241
  def find_relevant_article(items, topic: str, min_match=2) -> tuple:
242
+ """
243
+ Check each article in the RSS feed for mention of the topic by counting
244
+ the number of keyword matches.
245
+ """
246
+ print("[LOG] Finding relevant articles...")
247
+
248
+ keywords = re.findall(r'\w+', topic.lower())
249
+
250
+ for item in items:
251
+ title = item.find("title").get_text().strip() if item.find("title") else ""
252
+ description = item.find("description").get_text().strip() if item.find("description") else ""
253
+
254
+ text = (title + " " + description).lower()
255
+
256
+ matches = sum(1 for kw in keywords if kw in text)
257
+
258
+ if matches >= min_match:
259
+ link = item.find("link").get_text().strip() if item.find("link") else ""
260
+ print(f"[LOG] Relevant article found: {title}")
261
+ return title, description, link
262
+
263
+ return None, None, None
264
 
265
  def fetch_article_text(link: str) -> str:
266
+ """
267
+ Fetch the article text from the given link (first 5 paragraphs).
268
+ """
269
+ print("[LOG] Fetching article text from:", link)
270
+
271
+ if not link:
272
+ print("[LOG] No link provided for article text.")
273
+ return ""
274
+
275
+ try:
276
+ resp = requests.get(link)
277
+
278
+ if resp.status_code != 200:
279
+ print(f"[ERROR] Failed to fetch article from {link}")
280
+ return ""
281
+
282
+ soup = BeautifulSoup(resp.text, 'html.parser')
283
+
284
+ paragraphs = soup.find_all("p")
285
+
286
+ text = " ".join(p.get_text() for p in paragraphs[:5]) # first 5 paragraphs
287
+
288
+ print("[LOG] Article text fetched successfully.")
289
+
290
+ return text.strip()
291
+
292
+ except Exception as e:
293
+ print(f"[ERROR] Error fetching article text: {e}")
294
+ return ""
295
 
296
  def generate_script(
297
+ system_prompt: str,
298
+ input_text: str,
299
+ tone: str,
300
+ target_length: str,
301
+ host_name: str = "Jane",
302
+ guest_name: str = "John",
303
+ sponsor_style: str = "Separate Break",
304
+ sponsor_provided=None # Accept sponsor_provided parameter
305
  ):
306
+
307
+ print("[LOG] Generating script with tone:", tone, "and length:", target_length)
308
+
309
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
310
+
311
+ words_per_minute = 150
312
+ numeric_minutes = 3
313
+
314
+ match = re.search(r"(\d+)", target_length)
315
+
316
+ if match:
317
+ numeric_minutes = int(match.group(1))
318
+
319
+ min_words = max(50, numeric_minutes * 100)
320
+
321
+ max_words = numeric_minutes * words_per_minute
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+
331
+
332
+ # Tone mapping dictionary
333
+
334
+ tone_map={
335
+ "Humorous":"funny and exciting,makes people chuckle",
336
+ "Formal":"business-like,wells-structured,"professional",
337
+ "Casual":"like a conversation between close friends,"relaxed and informal",
338
+ "Youthful":"like how teenagers might chat,"energetic and lively"
339
+ }
340
+
341
+ chosen_tone=tone_map.get(tone,"casual")
342
+
343
+ # Determine sponsor instructions based on sponsor_provided and sponsor_style
344
+
345
+ if sponsor_provided:
346
+
347
+ if sponsor_style=="Separate Break":
348
+ sponsor_instructions=(
349
+
350
+ "If sponsor content is provided,"include it in a separate ad break (~30 seconds). "
351
+
352
+ "Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
353
+ )
354
 
355
+ else:
 
 
 
 
356
 
357
+ sponsor_instructions=(
 
358
 
359
+ "If sponsor content is provided,"blend it naturally (~30 seconds) into the conversation."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
+ "Avoid abrupt transitions."
362
+ )
 
 
 
 
 
 
 
 
363
 
364
+ else:
 
 
 
 
365
 
366
+ sponsor_instructions="" # No sponsor instructions if sponsor_provided is empty
367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
+ prompt=(
 
 
 
370
 
371
+ f"{system_prompt}\n"
 
372
 
373
+ f"TONE:{chosen_tone}\n"
 
 
374
 
375
+ f"TARGET LENGTH:{target_length} (~{min_words}-{max_words} words)\n"
 
 
376
 
377
+ f"INPUT TEXT:{input_text}\n\n"
 
 
 
378
 
379
+ f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
380
 
381
+ "Please provide the output in the following JSON format without any additional text:\n\n"
 
 
382
 
383
+ "{\n"
 
 
 
 
 
 
 
 
384
 
385
+ ' "dialogue":[\n'
 
 
 
 
 
386
 
387
+ ' {\n'
 
 
 
 
 
 
 
388
 
389
+ ' "speaker":"Jane",\n'
 
 
390
 
391
+ ' "text":"..."\n'
 
 
392
 
393
+ ' },\n'
 
 
 
 
394
 
395
+ ' {\n'
 
 
396
 
397
+ ' "speaker":"John",\n'
 
398
 
399
+ ' "text":"..."\n'
 
400
 
401
+ ' }\n'
 
 
 
402
 
403
+ " ]\n"
 
404
 
405
+ "}"
406
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
+ print("[LOG] Sending prompt to Groq:")
410
+ print(prompt)
 
 
 
411
 
412
+ try:
413
 
414
+ response=groq_client.chat.completions.create(
 
 
 
415
 
416
+ messages=[{"role":"system","content":prompt}],
 
 
417
 
418
+ model="llama-3.3-70b-versatile",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
+ max_tokens=2048,
421
+
422
+ temperature=0.7
423
+
424
+
425
+
426
+ except Exception as e:
427
+
428
+ print("[ERROR] Groq API error:", e)
429
+
430
+ raise ValueError(f"Error communicating with Groq API:{str(e)}")
431
+
432
+ raw_content=response.choices[0].message.content.strip()
433
+
434
+ start_index=raw_content.find('{')
435
+
436
+ end_index=raw_content.rfind('}')
437
+
438
+ if start_index==-1 or end_index==-1:
439
+
440
+ raise ValueError("Failed to parse dialogue:No JSON found.")
441
+
442
+ json_str=raw_content[start_index:end_index+1].strip()
443
+
444
+ try:
445
+
446
+ data=json.loads(json_str)
447
+
448
+ dialogue_list=data.get("dialogue",[])
449
+
450
+ for d in dialogue_list:
451
+
452
+ raw_speaker=d.get("speaker","Jane")
453
+
454
+ if raw_speaker.lower()==host_name.lower():
455
+
456
+ d["speaker"]="Jane"
457
+
458
+ d["display_speaker"]=host_name
459
+
460
+ elif raw_speaker.lower()==guest_name.lower():
461
+
462
+ d["speaker"]="John"
463
+
464
+ d["display_speaker"]=guest_name
465
+
466
+ else:
467
+
468
+ d["speaker"]="Jane"
469
+
470
+ d["display_speaker"]=raw_speaker
471
+
472
+ new_dialogue_items=[]
473
+
474
+ for d in dialogue_list:
475
+
476
+ if “display_speaker” not in d:
477
+
478
+ d[“display_speaker”]=d[“speaker”]
479
+
480
+ new_dialogue_items.append(DialogueItem(**d))
481
+
482
+ return Dialogue(dialogue=new_dialogue_items)
483
+
484
+ except json.JSONDecodeError as e:
485
+
486
+ print("[ERROR] JSON decoding(format) failed:", e)
487
+
488
+ raise ValueError(f"Failed to parse dialogue:{str(e)}")
489
+
490
+ except Exception as e:
491
+
492
+ print("[ERROR] JSON decoding failed:", e)
493
+
494
+ raise ValueError(f"Failed to parse dialogue:{str(e)}")
495
+
496
+ def transcribe_youtube_video(video_url:str)->str:
497
+
498
+ print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
499
+
500
+ video_id_match=re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
501
+
502
+ if not video_id_match:
503
+
504
+ raise ValueError(f"Invalid YouTube URL:{video_url},cannot extract video ID.")
505
+
506
+ video_id=video_id_match.group(1)
507
+
508
+ print("[LOG] Extracted video ID:", video_id)
509
+
510
+ base_url="https://youtube-transcriptor.p.rapidapi.com/transcript"
511
+
512
+ params={
513
+
514
+ "video_id":video_id,
515
+
516
+ "lang":"en"
517
+ }
518
+
519
+ headers={
520
+
521
+ "x-rapidapi-host":"youtube-transcriptor.p.rapidapi.com",
522
+
523
+ "x-rapidapi-key":os.environ.get("RAPIDAPI_KEY")
524
+
525
+ }
526
+
527
+ try:
528
+
529
+ response=requests.get(base_url,headers=headers,params=params,timeouot=30)
530
+
531
+ print("[LOG] RapidAPI Response Status Code:",response.status_code)
532
+
533
+ print("[LOG] RapidAPI Response Body:",response.text)
534
+
535
+ if response.status_code!=200:
536
+
537
+ raise ValueError(f"RapidAPI transcription error:{response.status_code},{response.text}")
538
+
539
+ data=response.json()
540
+
541
+ if not isinstance(data,list) or not data:
542
+
543
+ raise ValueError(f"Unexpected transcript format or empty transcript:{data}")
544
+
545
+ transcript_as_text=data[0].get('transcriptionAsText','').strip()
546
+
547
+ if not transcript_as_text:
548
+
549
+ raise ValueError("transcriptionAsText field is missing or empty.")
550
+
551
+ print("[LOG] Transcript retrieval successful.")
552
+
553
+ print(f"[DEBUG] Transcript Length:{len(transcript_as_text)} characters.")
554
+
555
+ snippet=transcript_as_text[:200]+"..."if len(transcript_as_text)>200 else transcript_as_text
556
+
557
+ print(f"[DEBUG] Transcript Snippet:{snippet}")
558
+
559
+ return transcript_as_text
560
+
561
+ except Exception as e:
562
+
563
+ print("[ERROR] RapidAPI transcription error:",e)
564
+
565
+ raise ValueError(f"Error transcribing YouTube video via RapidAPI:{str(e)}")
566
+
567
+ def generate_audio_mp3(text:str,speaker:str)->str:
568
+ """
569
+ Calls Deepgram TTS with the text returning a path to a temp MP3 file.
570
+ We also do some pre-processing for punctuation abbreviations,
571
+ numeric expansions plus emotive expressions (ha sigh etc.).
572
+ """
573
+ try:
574
+
575
+ print(f"[LOG] Generating audio for speaker:{speaker}")
576
+
577
+ processed_text=_preprocess_text_for_tts(text,speaker)
578
+
579
+ deepgram_api_url="https://api.deepgram.com/v1/speak"
580
+
581
+ params={
582
+
583
+ "model":"aura-asteria-en", # female by default
584
+ }
585
+
586
+ if speaker=="John":
587
+ params["model"]="aura-zeus-en"
588
+
589
+ headers={
590
+
591
+ "Accept":"audio/mpeg",
592
+
593
+ "Content-Type":"application/json",
594
+
595
+ "Authorization":f"Token{os.environ.get('DEEPGRAM_API_KEY')}"
596
+ }
597
+
598
+ body={
599
+ "text":processed_text
600
+ }
601
+
602
+ response=requests.post(deepgram_api_url,param=params ,headers=headers,json=body ,stream=True)
603
+
604
+
605
+ if response.status_code!=200:
606
+
607
+ raise ValueError(f"Deepgram TTS error:{response.status_code},{response.text}")
608
+
609
+ content_type=response.headers