MyPod_10

Running

siddhartharyaai commited on Jan 15

Commit

e6cb3c2

verified ·

1 Parent(s): cabe6ff

Update utils.py

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -16,7 +16,7 @@ from groq import Groq
 import numpy as np
 import torch
 import random
-from num2words import num2words  # Added for robust number-to-words conversion
 class DialogueItem(BaseModel):
     speaker: Literal["Jane", "John"]   # TTS voice
@@ -535,12 +535,14 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     # 5) Convert pure integer => words
     def convert_int_to_words(m):
         num_str = m.group()
         try:
-            return num2words(int(num_str))
         except Exception:
             return num_str
-    # Updated regex to catch all digit sequences
-    text = re.sub(r"\d+", convert_int_to_words, text)
     # 6) Expand leftover all-caps => "NASA" => "N A S A"
     def expand_abbreviations(m):

 import numpy as np
 import torch
 import random
+from num2words import num2words  # For robust number-to-words conversion
 class DialogueItem(BaseModel):
     speaker: Literal["Jane", "John"]   # TTS voice
     # 5) Convert pure integer => words
     def convert_int_to_words(m):
         num_str = m.group()
+        # Remove commas before conversion
+        num_str_clean = num_str.replace(',', '')
         try:
+            return num2words(int(num_str_clean))
         except Exception:
             return num_str
+    # Updated regex to catch multi-digit numbers with commas
+    text = re.sub(r"\d[\d,]*", convert_int_to_words, text)
     # 6) Expand leftover all-caps => "NASA" => "N A S A"
     def expand_abbreviations(m):