siddhartharyaai commited on
Commit
9bb818e
·
verified ·
1 Parent(s): cc24111

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +50 -16
utils.py CHANGED
@@ -165,7 +165,7 @@ def research_topic(topic: str) -> str:
165
  print("[DEBUG] Aggregated info from primary sources:")
166
  print(aggregated_info)
167
 
168
- # Fallback to LLM if insufficient
169
  if not is_sufficient(aggregated_info):
170
  print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
171
  additional_info = query_llm_for_additional_info(topic, aggregated_info)
@@ -277,9 +277,6 @@ def generate_script(
277
  """
278
  Sends the system_prompt plus input_text to the Groq LLM to generate a
279
  multi-speaker Dialogue in JSON, returning a Dialogue object.
280
-
281
- sponsor_style can be "Separate Break" or "Blended".
282
- We add instructions telling the model how to integrate the sponsor content.
283
  """
284
  print("[LOG] Generating script with tone:", tone, "and length:", target_length)
285
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
@@ -306,7 +303,7 @@ def generate_script(
306
  "If sponsor content is provided, include it in a separate ad break (~30 seconds). "
307
  "Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
308
  )
309
- else: # Blended
310
  sponsor_instructions = (
311
  "If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
312
  "Avoid abrupt transitions."
@@ -377,7 +374,6 @@ def generate_script(
377
  new_dialogue_items.append(DialogueItem(**d))
378
 
379
  return Dialogue(dialogue=new_dialogue_items)
380
-
381
  except json.JSONDecodeError as e:
382
  print("[ERROR] JSON decoding (format) failed:", e)
383
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
@@ -472,7 +468,7 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
472
  mp3_file.write(chunk)
473
  mp3_path = mp3_file.name
474
 
475
- # Normalize
476
  audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
477
  audio_seg = effects.normalize(audio_seg)
478
 
@@ -493,7 +489,7 @@ def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
493
  def _preprocess_text_for_tts(text: str, speaker: str) -> str:
494
  """
495
  1) "SaaS" => "sass"
496
- 2) Insert periods in uppercase abbreviations -> remove for TTS
497
  3) Convert decimals like "3.14" -> "three point one four"
498
  4) Convert pure integer numbers like "20" -> "twenty"
499
  5) Expand leftover all-caps
@@ -505,7 +501,7 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
505
  # 1) "SaaS" => "sass"
506
  text = re.sub(r"\b(?i)SaaS\b", "sass", text)
507
 
508
- # 2) Insert periods for uppercase abbreviations (>=2 chars), then remove them
509
  def insert_periods_for_abbrev(m):
510
  abbr = m.group(0)
511
  parted = ".".join(list(abbr)) + "."
@@ -552,14 +548,11 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
552
  text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
553
 
554
  # 7) Emotive placeholders
555
- # "haha", "ha", "heh", "lol" => "(* laughs *)"
556
  text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
557
- # "sigh" => "(* sighs *)"
558
  text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
559
- # "groan", "moan" => "(* groans *)"
560
  text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
561
 
562
- # 8) Insert filler words if speaker != Jane
563
  if speaker != "Jane":
564
  def insert_thinking_pause(m):
565
  word = m.group(1)
@@ -584,9 +577,27 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
584
 
585
  return text.strip()
586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  def number_to_words(n: int) -> str:
588
  """
589
- Basic integer-to-words up to ~99999.
590
  For a robust approach, consider the 'num2words' library.
591
  """
592
  if n == 0:
@@ -637,8 +648,8 @@ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegm
637
  """
638
  Mixes 'spoken' with a default bg_music.mp3 or user-provided custom music:
639
  1) Start with 2 seconds of music alone before speech begins.
640
- 2) Loop music if shorter than final audio length.
641
- 3) Lower music volume so speech is clear.
642
  """
643
  if custom_music_path:
644
  music_path = custom_music_path
@@ -661,3 +672,26 @@ def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegm
661
  looped_music = looped_music[:total_length_ms]
662
  final_mix = looped_music.overlay(spoken, position=2000)
663
  return final_mix
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  print("[DEBUG] Aggregated info from primary sources:")
166
  print(aggregated_info)
167
 
168
+ # If not enough data, fallback to LLM
169
  if not is_sufficient(aggregated_info):
170
  print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
171
  additional_info = query_llm_for_additional_info(topic, aggregated_info)
 
277
  """
278
  Sends the system_prompt plus input_text to the Groq LLM to generate a
279
  multi-speaker Dialogue in JSON, returning a Dialogue object.
 
 
 
280
  """
281
  print("[LOG] Generating script with tone:", tone, "and length:", target_length)
282
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
303
  "If sponsor content is provided, include it in a separate ad break (~30 seconds). "
304
  "Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
305
  )
306
+ else:
307
  sponsor_instructions = (
308
  "If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
309
  "Avoid abrupt transitions."
 
374
  new_dialogue_items.append(DialogueItem(**d))
375
 
376
  return Dialogue(dialogue=new_dialogue_items)
 
377
  except json.JSONDecodeError as e:
378
  print("[ERROR] JSON decoding (format) failed:", e)
379
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
 
468
  mp3_file.write(chunk)
469
  mp3_path = mp3_file.name
470
 
471
+ # Normalize volume
472
  audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
473
  audio_seg = effects.normalize(audio_seg)
474
 
 
489
  def _preprocess_text_for_tts(text: str, speaker: str) -> str:
490
  """
491
  1) "SaaS" => "sass"
492
+ 2) Insert periods for uppercase abbreviations -> remove for TTS
493
  3) Convert decimals like "3.14" -> "three point one four"
494
  4) Convert pure integer numbers like "20" -> "twenty"
495
  5) Expand leftover all-caps
 
501
  # 1) "SaaS" => "sass"
502
  text = re.sub(r"\b(?i)SaaS\b", "sass", text)
503
 
504
+ # 2) Insert periods in uppercase abbreviations (>=2 chars), then remove them
505
  def insert_periods_for_abbrev(m):
506
  abbr = m.group(0)
507
  parted = ".".join(list(abbr)) + "."
 
548
  text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
549
 
550
  # 7) Emotive placeholders
 
551
  text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
 
552
  text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
 
553
  text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
554
 
555
+ # 8) Insert filler words if speaker != "Jane"
556
  if speaker != "Jane":
557
  def insert_thinking_pause(m):
558
  word = m.group(1)
 
577
 
578
  return text.strip()
579
 
580
+ def _spell_digits(d: str) -> str:
581
+ """
582
+ Convert individual digits '3' -> 'three'.
583
+ """
584
+ digit_map = {
585
+ '0': 'zero',
586
+ '1': 'one',
587
+ '2': 'two',
588
+ '3': 'three',
589
+ '4': 'four',
590
+ '5': 'five',
591
+ '6': 'six',
592
+ '7': 'seven',
593
+ '8': 'eight',
594
+ '9': 'nine'
595
+ }
596
+ return " ".join(digit_map[ch] for ch in d if ch in digit_map)
597
+
598
  def number_to_words(n: int) -> str:
599
  """
600
+ Basic integer-to-words up to ~99999.
601
  For a robust approach, consider the 'num2words' library.
602
  """
603
  if n == 0:
 
648
  """
649
  Mixes 'spoken' with a default bg_music.mp3 or user-provided custom music:
650
  1) Start with 2 seconds of music alone before speech begins.
651
+ 2) Loop the music if it's shorter than the final audio length.
652
+ 3) Lower music volume so the speech is clear.
653
  """
654
  if custom_music_path:
655
  music_path = custom_music_path
 
672
  looped_music = looped_music[:total_length_ms]
673
  final_mix = looped_music.overlay(spoken, position=2000)
674
  return final_mix
675
+
676
+ # This function is new for short Q&A calls
677
+ def call_groq_api_for_qa(system_prompt: str) -> str:
678
+ """
679
+ A minimal placeholder for your short Q&A LLM call.
680
+ Must return a JSON string, e.g.:
681
+ {"speaker": "John", "text": "Short answer here"}
682
+ """
683
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
684
+ try:
685
+ response = groq_client.chat.completions.create(
686
+ messages=[{"role": "system", "content": system_prompt}],
687
+ model="llama-3.3-70b-versatile",
688
+ max_tokens=512,
689
+ temperature=0.7
690
+ )
691
+ except Exception as e:
692
+ print("[ERROR] Groq API error:", e)
693
+ fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
694
+ return json.dumps(fallback)
695
+
696
+ raw_content = response.choices[0].message.content.strip()
697
+ return raw_content