siddhartharyaai commited on
Commit
1c6ae69
·
verified ·
1 Parent(s): be1de78

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +56 -77
utils.py CHANGED
@@ -445,8 +445,10 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
445
  try:
446
  print(f"[LOG] Generating audio for speaker: {speaker}")
447
  processed_text = _preprocess_text_for_tts(text, speaker)
448
- # Final safeguard: convert any remaining digit sequences to words
449
- processed_text = re.sub(r"\d[\d,]*", lambda m: num2words(int(m.group().replace(',', ''))), processed_text)
 
 
450
 
451
  deepgram_api_url = "https://api.deepgram.com/v1/speak"
452
  params = {
@@ -498,82 +500,59 @@ def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
498
 
499
  def _preprocess_text_for_tts(text: str, speaker: str) -> str:
500
  """
501
- 1) "SaaS" => "sass"
502
- 2) Insert periods for uppercase abbreviations -> remove for TTS
503
- 3) Convert decimals like "3.14" -> "three point one four"
504
- 4) Convert pure integer numbers like "20" -> "twenty"
505
- 5) Expand leftover all-caps
506
- 6) Emotive placeholders for 'ha', 'sigh', etc.
507
- 7) Remove random fillers
508
- 8) Capitalize sentence starts
509
- """
510
- # 1) "SaaS" => "sass"
511
- text = re.sub(r"\b(?i)SaaS\b", "sass", text)
512
-
513
- # 2) Insert periods in uppercase abbreviations (>=2 chars), then remove them
514
- def insert_periods_for_abbrev(m):
515
- abbr = m.group(0)
516
- parted = ".".join(list(abbr)) + "."
517
- return parted
518
- text = re.sub(r"\b([A-Z0-9]{2,})\b", insert_periods_for_abbrev, text)
519
- text = re.sub(r"\.\.", ".", text)
520
- def remove_periods_for_tts(m):
521
- chunk = m.group(0)
522
- return chunk.replace(".", " ").strip()
523
- text = re.sub(r"[A-Z0-9]\.[A-Z0-9](?:\.[A-Z0-9])*\.", remove_periods_for_tts, text)
524
-
525
- # 3) Hyphens -> spaces
526
- text = re.sub(r"-", " ", text)
527
-
528
- # 4) Convert decimals (e.g. "3.14")
529
- def convert_decimal(m):
530
- number_str = m.group()
531
- parts = number_str.split('.')
532
- whole_part = _spell_digits(parts[0])
533
- decimal_part = " ".join(_spell_digits(d) for d in parts[1])
534
- return f"{whole_part} point {decimal_part}"
535
- text = re.sub(r"\b\d+\.\d+\b", convert_decimal, text)
536
-
537
- # 5) Convert pure integer => words
538
- def convert_int_to_words(m):
539
- num_str = m.group()
540
- # Remove commas before conversion
541
- num_str_clean = num_str.replace(',', '')
542
  try:
543
- return num2words(int(num_str_clean))
544
- except Exception:
545
- return num_str
546
- # Updated regex to catch multi-digit numbers with commas
547
- text = re.sub(r"\d[\d,]*", convert_int_to_words, text)
548
-
549
- # 6) Expand leftover all-caps => "NASA" => "N A S A"
550
- def expand_abbreviations(m):
551
- abbrev = m.group()
552
- if abbrev.endswith('s') and abbrev[:-1].isupper():
553
- singular = abbrev[:-1]
554
- expanded = " ".join(list(singular)) + "s"
555
- special_plurals = {
556
- "MPs": "M Peas",
557
- "TMTs": "T M Tees",
558
- "ARJs": "A R Jays",
559
- }
560
- return special_plurals.get(abbrev, expanded)
561
- else:
562
- return " ".join(list(abbrev))
563
- text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
564
-
565
- # 7) Emotive placeholders
566
- text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
567
- text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
568
- text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
569
-
570
- # 8) Remove random fillers
571
- text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
572
-
573
- # 9) Capitalize sentence starts
574
- def capitalize_match(m):
575
- return m.group().upper()
576
- text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
 
 
 
 
 
577
 
578
  return text.strip()
579
 
 
445
  try:
446
  print(f"[LOG] Generating audio for speaker: {speaker}")
447
  processed_text = _preprocess_text_for_tts(text, speaker)
448
+
449
+ # Debug: Print the processed text to verify number conversion
450
+ print("[DEBUG] Processed text for TTS:")
451
+ print(processed_text)
452
 
453
  deepgram_api_url = "https://api.deepgram.com/v1/speak"
454
  params = {
 
500
 
501
  def _preprocess_text_for_tts(text: str, speaker: str) -> str:
502
  """
503
+ Comprehensive preprocessing to ensure natural speech:
504
+ 1) Convert multi-digit numbers to words.
505
+ 2) Handle abbreviations.
506
+ 3) Convert decimals.
507
+ 4) Handle emotive expressions.
508
+ 5) Remove random fillers.
509
+ 6) Capitalize sentence starts.
510
+ """
511
+ # 1) Convert multi-digit numbers (including those with commas) to words
512
+ def convert_number(match):
513
+ num_str = match.group()
514
+ # Remove commas
515
+ num_clean = num_str.replace(',', '')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  try:
517
+ # Convert to integer if possible
518
+ number = int(num_clean)
519
+ return num2words(number)
520
+ except ValueError:
521
+ try:
522
+ # If not integer, try float
523
+ number = float(num_clean)
524
+ return num2words(number)
525
+ except ValueError:
526
+ # If not a number, return as is
527
+ return num_str
528
+
529
+ # Regex to match numbers with optional commas and decimal points
530
+ text = re.sub(r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b', convert_number, text)
531
+
532
+ # 2) Handle abbreviations (e.g., NIA -> N I A)
533
+ def expand_abbreviation(match):
534
+ abbr = match.group()
535
+ # Do not add spaces if the abbreviation is part of a word
536
+ return ' '.join(abbr)
537
+
538
+ text = re.sub(r'\b[A-Z]{2,}\b', expand_abbreviation, text)
539
+
540
+ # 3) Handle emotive expressions
541
+ text = re.sub(r'\b(ha|haha|heh|lol)\b', '(* laughs *)', text, flags=re.IGNORECASE)
542
+ text = re.sub(r'\bsigh\b', '(* sighs *)', text, flags=re.IGNORECASE)
543
+ text = re.sub(r'\b(groan|moan)\b', '(* groans *)', text, flags=re.IGNORECASE)
544
+
545
+ # 4) Remove random fillers
546
+ text = re.sub(r'\b(uh|um|ah)\b', '', text, flags=re.IGNORECASE)
547
+
548
+ # 5) Capitalize sentence starts
549
+ def capitalize_sentence(match):
550
+ return match.group().upper()
551
+
552
+ text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_sentence, text)
553
+
554
+ # 6) Replace multiple spaces with single space
555
+ text = re.sub(r'\s+', ' ', text)
556
 
557
  return text.strip()
558