Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -491,12 +491,18 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
|
491 |
Preprocesses the input text for TTS by handling punctuation, abbreviations,
|
492 |
and ensuring numeric sequences are passed directly.
|
493 |
"""
|
|
|
|
|
|
|
494 |
# 1) "SaaS" => "sass"
|
495 |
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
496 |
|
497 |
# 2) Insert periods in uppercase abbreviations (letters only), then remove them
|
|
|
498 |
def insert_periods_for_abbrev(m):
|
499 |
abbr = m.group(0)
|
|
|
|
|
500 |
return ".".join(list(abbr)) + "."
|
501 |
text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
|
502 |
text = re.sub(r"\.\.", ".", text)
|
|
|
491 |
Preprocesses the input text for TTS by handling punctuation, abbreviations,
|
492 |
and ensuring numeric sequences are passed directly.
|
493 |
"""
|
494 |
+
# Handle common shortform "No." for "Number"
|
495 |
+
text = re.sub(r"\bNo\.\b", "Number", text)
|
496 |
+
|
497 |
# 1) "SaaS" => "sass"
|
498 |
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
499 |
|
500 |
# 2) Insert periods in uppercase abbreviations (letters only), then remove them
|
501 |
+
abbreviations_as_words = {"NASA", "NATO", "UNESCO"} # Add exceptions as needed
|
502 |
def insert_periods_for_abbrev(m):
|
503 |
abbr = m.group(0)
|
504 |
+
if abbr in abbreviations_as_words:
|
505 |
+
return abbr
|
506 |
return ".".join(list(abbr)) + "."
|
507 |
text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
|
508 |
text = re.sub(r"\.\.", ".", text)
|