siddhartharyaai commited on
Commit
25ae42e
·
verified ·
1 Parent(s): 5006b54

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +6 -0
utils.py CHANGED
@@ -491,12 +491,18 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
491
  Preprocesses the input text for TTS by handling punctuation, abbreviations,
492
  and ensuring numeric sequences are passed directly.
493
  """
 
 
 
494
  # 1) "SaaS" => "sass"
495
  text = re.sub(r"\b(?i)SaaS\b", "sass", text)
496
 
497
  # 2) Insert periods in uppercase abbreviations (letters only), then remove them
 
498
  def insert_periods_for_abbrev(m):
499
  abbr = m.group(0)
 
 
500
  return ".".join(list(abbr)) + "."
501
  text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
502
  text = re.sub(r"\.\.", ".", text)
 
491
  Preprocesses the input text for TTS by handling punctuation, abbreviations,
492
  and ensuring numeric sequences are passed directly.
493
  """
494
+ # Handle common shortform "No." for "Number"
495
+ text = re.sub(r"\bNo\.\b", "Number", text)
496
+
497
  # 1) "SaaS" => "sass"
498
  text = re.sub(r"\b(?i)SaaS\b", "sass", text)
499
 
500
  # 2) Insert periods in uppercase abbreviations (letters only), then remove them
501
+ abbreviations_as_words = {"NASA", "NATO", "UNESCO"} # Add exceptions as needed
502
  def insert_periods_for_abbrev(m):
503
  abbr = m.group(0)
504
+ if abbr in abbreviations_as_words:
505
+ return abbr
506
  return ".".join(list(abbr)) + "."
507
  text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
508
  text = re.sub(r"\.\.", ".", text)