MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 15

Commit

1c6ae69

verified ·

1 Parent(s): be1de78

Update utils.py

Browse files

Files changed (1) hide show

utils.py +56 -77

utils.py CHANGED Viewed

@@ -445,8 +445,10 @@ def generate_audio_mp3(text: str, speaker: str) -> str:
     try:
         print(f"[LOG] Generating audio for speaker: {speaker}")
         processed_text = _preprocess_text_for_tts(text, speaker)
-        # Final safeguard: convert any remaining digit sequences to words
-        processed_text = re.sub(r"\d[\d,]*", lambda m: num2words(int(m.group().replace(',', ''))), processed_text)
         deepgram_api_url = "https://api.deepgram.com/v1/speak"
         params = {
@@ -498,82 +500,59 @@ def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
 def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     """
-    1) "SaaS" => "sass"
-    2) Insert periods for uppercase abbreviations -> remove for TTS
-    3) Convert decimals like "3.14" -> "three point one four"
-    4) Convert pure integer numbers like "20" -> "twenty"
-    5) Expand leftover all-caps
-    6) Emotive placeholders for 'ha', 'sigh', etc.
-    7) Remove random fillers
-    8) Capitalize sentence starts
-    """
-    # 1) "SaaS" => "sass"
-    text = re.sub(r"\b(?i)SaaS\b", "sass", text)
-    # 2) Insert periods in uppercase abbreviations (>=2 chars), then remove them
-    def insert_periods_for_abbrev(m):
-        abbr = m.group(0)
-        parted = ".".join(list(abbr)) + "."
-        return parted
-    text = re.sub(r"\b([A-Z0-9]{2,})\b", insert_periods_for_abbrev, text)
-    text = re.sub(r"\.\.", ".", text)
-    def remove_periods_for_tts(m):
-        chunk = m.group(0)
-        return chunk.replace(".", " ").strip()
-    text = re.sub(r"[A-Z0-9]\.[A-Z0-9](?:\.[A-Z0-9])*\.", remove_periods_for_tts, text)
-    # 3) Hyphens -> spaces
-    text = re.sub(r"-", " ", text)
-    # 4) Convert decimals (e.g. "3.14")
-    def convert_decimal(m):
-        number_str = m.group()
-        parts = number_str.split('.')
-        whole_part = _spell_digits(parts[0])
-        decimal_part = " ".join(_spell_digits(d) for d in parts[1])
-        return f"{whole_part} point {decimal_part}"
-    text = re.sub(r"\b\d+\.\d+\b", convert_decimal, text)
-    # 5) Convert pure integer => words
-    def convert_int_to_words(m):
-        num_str = m.group()
-        # Remove commas before conversion
-        num_str_clean = num_str.replace(',', '')
         try:
-            return num2words(int(num_str_clean))
-        except Exception:
-            return num_str
-    # Updated regex to catch multi-digit numbers with commas
-    text = re.sub(r"\d[\d,]*", convert_int_to_words, text)
-    # 6) Expand leftover all-caps => "NASA" => "N A S A"
-    def expand_abbreviations(m):
-        abbrev = m.group()
-        if abbrev.endswith('s') and abbrev[:-1].isupper():
-            singular = abbrev[:-1]
-            expanded = " ".join(list(singular)) + "s"
-            special_plurals = {
-                "MPs": "M Peas",
-                "TMTs": "T M Tees",
-                "ARJs": "A R Jays",
-            }
-            return special_plurals.get(abbrev, expanded)
-        else:
-            return " ".join(list(abbrev))
-    text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
-    # 7) Emotive placeholders
-    text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
-    text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
-    text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
-    # 8) Remove random fillers
-    text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
-    # 9) Capitalize sentence starts
-    def capitalize_match(m):
-        return m.group().upper()
-    text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
     return text.strip()

     try:
         print(f"[LOG] Generating audio for speaker: {speaker}")
         processed_text = _preprocess_text_for_tts(text, speaker)
+        # Debug: Print the processed text to verify number conversion
+        print("[DEBUG] Processed text for TTS:")
+        print(processed_text)
         deepgram_api_url = "https://api.deepgram.com/v1/speak"
         params = {
 def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     """
+    Comprehensive preprocessing to ensure natural speech:
+    1) Convert multi-digit numbers to words.
+    2) Handle abbreviations.
+    3) Convert decimals.
+    4) Handle emotive expressions.
+    5) Remove random fillers.
+    6) Capitalize sentence starts.
+    """
+    # 1) Convert multi-digit numbers (including those with commas) to words
+    def convert_number(match):
+        num_str = match.group()
+        # Remove commas
+        num_clean = num_str.replace(',', '')
         try:
+            # Convert to integer if possible
+            number = int(num_clean)
+            return num2words(number)
+        except ValueError:
+            try:
+                # If not integer, try float
+                number = float(num_clean)
+                return num2words(number)
+            except ValueError:
+                # If not a number, return as is
+                return num_str
+    # Regex to match numbers with optional commas and decimal points
+    text = re.sub(r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b', convert_number, text)
+    # 2) Handle abbreviations (e.g., NIA -> N I A)
+    def expand_abbreviation(match):
+        abbr = match.group()
+        # Do not add spaces if the abbreviation is part of a word
+        return ' '.join(abbr)
+    text = re.sub(r'\b[A-Z]{2,}\b', expand_abbreviation, text)
+    # 3) Handle emotive expressions
+    text = re.sub(r'\b(ha|haha|heh|lol)\b', '(* laughs *)', text, flags=re.IGNORECASE)
+    text = re.sub(r'\bsigh\b', '(* sighs *)', text, flags=re.IGNORECASE)
+    text = re.sub(r'\b(groan|moan)\b', '(* groans *)', text, flags=re.IGNORECASE)
+    # 4) Remove random fillers
+    text = re.sub(r'\b(uh|um|ah)\b', '', text, flags=re.IGNORECASE)
+    # 5) Capitalize sentence starts
+    def capitalize_sentence(match):
+        return match.group().upper()
+    text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_sentence, text)
+    # 6) Replace multiple spaces with single space
+    text = re.sub(r'\s+', ' ', text)
     return text.strip()