siddhartharyaai commited on
Commit
e6cb3c2
·
verified ·
1 Parent(s): cabe6ff

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +6 -4
utils.py CHANGED
@@ -16,7 +16,7 @@ from groq import Groq
16
  import numpy as np
17
  import torch
18
  import random
19
- from num2words import num2words # Added for robust number-to-words conversion
20
 
21
  class DialogueItem(BaseModel):
22
  speaker: Literal["Jane", "John"] # TTS voice
@@ -535,12 +535,14 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
535
  # 5) Convert pure integer => words
536
  def convert_int_to_words(m):
537
  num_str = m.group()
 
 
538
  try:
539
- return num2words(int(num_str))
540
  except Exception:
541
  return num_str
542
- # Updated regex to catch all digit sequences
543
- text = re.sub(r"\d+", convert_int_to_words, text)
544
 
545
  # 6) Expand leftover all-caps => "NASA" => "N A S A"
546
  def expand_abbreviations(m):
 
16
  import numpy as np
17
  import torch
18
  import random
19
+ from num2words import num2words # For robust number-to-words conversion
20
 
21
  class DialogueItem(BaseModel):
22
  speaker: Literal["Jane", "John"] # TTS voice
 
535
  # 5) Convert pure integer => words
536
  def convert_int_to_words(m):
537
  num_str = m.group()
538
+ # Remove commas before conversion
539
+ num_str_clean = num_str.replace(',', '')
540
  try:
541
+ return num2words(int(num_str_clean))
542
  except Exception:
543
  return num_str
544
+ # Updated regex to catch multi-digit numbers with commas
545
+ text = re.sub(r"\d[\d,]*", convert_int_to_words, text)
546
 
547
  # 6) Expand leftover all-caps => "NASA" => "N A S A"
548
  def expand_abbreviations(m):