Spaces:

fahadqazi
/

Sindhi-Text-to-Speech

Running

fahadqazi commited on Dec 30, 2024

Commit

dd46f7a

verified ·

1 Parent(s): 9396b96

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,17 +33,10 @@ speaker_embeddings = torch.tensor(speaker_embeddings).to(device)
 default_embedding = speaker_embeddings
-# replacements = [
-#     ("â", "a"),  # Long a
-#     ("ç", "ch"),  # Ch as in "chair"
-#     ("ğ", "gh"),  # Silent g or slight elongation of the preceding vowel
-#     ("ı", "i"),   # Dotless i
-#     ("î", "i"),   # Long i
-#     ("ö", "oe"),  # Similar to German ö
-#     ("ş", "sh"),  # Sh as in "shoe"
-#     ("ü", "ue"),  # Similar to German ü
-#     ("û", "u"),   # Long u
-# ]
 number_words = {
     0: "ٻڙي",
@@ -95,14 +88,14 @@ def replace_numbers_with_words(text):
 def normalize_text(text):
     # Convert to lowercase
-    # text = text.lower()
     # Replace numbers with words
     text = replace_numbers_with_words(text)
     # Apply character replacements
-    # for old, new in replacements:
-    #     text = text.replace(old, new)
     # Remove punctuation
     text = re.sub(r'[^\w\s]', '', text)

 default_embedding = speaker_embeddings
+replacements = [
+    ("۾", "مين"),  #
+    ("۽", "ائين"),  #
+]
 number_words = {
     0: "ٻڙي",
 def normalize_text(text):
     # Convert to lowercase
+    text = text.lower()
     # Replace numbers with words
     text = replace_numbers_with_words(text)
     # Apply character replacements
+    for old, new in replacements:
+        text = text.replace(old, new)
     # Remove punctuation
     text = re.sub(r'[^\w\s]', '', text)