fahadqazi commited on
Commit
dd46f7a
·
verified ·
1 Parent(s): 9396b96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -14
app.py CHANGED
@@ -33,17 +33,10 @@ speaker_embeddings = torch.tensor(speaker_embeddings).to(device)
33
 
34
  default_embedding = speaker_embeddings
35
 
36
- # replacements = [
37
- # ("â", "a"), # Long a
38
- # ("ç", "ch"), # Ch as in "chair"
39
- # ("ğ", "gh"), # Silent g or slight elongation of the preceding vowel
40
- # ("ı", "i"), # Dotless i
41
- # ("î", "i"), # Long i
42
- # ("ö", "oe"), # Similar to German ö
43
- # ("ş", "sh"), # Sh as in "shoe"
44
- # ("ü", "ue"), # Similar to German ü
45
- # ("û", "u"), # Long u
46
- # ]
47
 
48
  number_words = {
49
  0: "ٻڙي",
@@ -95,14 +88,14 @@ def replace_numbers_with_words(text):
95
 
96
  def normalize_text(text):
97
  # Convert to lowercase
98
- # text = text.lower()
99
 
100
  # Replace numbers with words
101
  text = replace_numbers_with_words(text)
102
 
103
  # Apply character replacements
104
- # for old, new in replacements:
105
- # text = text.replace(old, new)
106
 
107
  # Remove punctuation
108
  text = re.sub(r'[^\w\s]', '', text)
 
33
 
34
  default_embedding = speaker_embeddings
35
 
36
+ replacements = [
37
+ ("۾", "مين"), #
38
+ ("۽", "ائين"), #
39
+ ]
 
 
 
 
 
 
 
40
 
41
  number_words = {
42
  0: "ٻڙي",
 
88
 
89
  def normalize_text(text):
90
  # Convert to lowercase
91
+ text = text.lower()
92
 
93
  # Replace numbers with words
94
  text = replace_numbers_with_words(text)
95
 
96
  # Apply character replacements
97
+ for old, new in replacements:
98
+ text = text.replace(old, new)
99
 
100
  # Remove punctuation
101
  text = re.sub(r'[^\w\s]', '', text)