Spaces:
Paused
Paused
File size: 2,759 Bytes
d66c48f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
"""https://github.com/bootphon/phonemizer"""
import re
from phonemizer import phonemize
from phonemizer.separator import Separator
rep_map = {
":": ",",
";": ",",
",": ",",
"。": ".",
"!": "!",
"?": "?",
"\n": ".",
"·": ",",
"、": ",",
"...": ".",
"…": ".",
"$": ".",
"“": "",
"”": "",
"‘": "",
"’": "",
"(": "",
")": "",
"(": "",
")": "",
"《": "",
"》": "",
"【": "",
"】": "",
"[": "",
"]": "",
"—": "",
"~": "-",
"~": "-",
"「": "",
"」": "",
"¿": "",
"¡": "",
}
_special_map = [
(re.compile("%s" % x[0]), x[1])
for x in [
("ø", "ɸ"),
("ː", ":"),
("ɜ", "ʒ"),
("ɑ̃", "ɑ~"),
("j", "jˈ"), # To avoid incorrect connect
("n", "ˈn"), # To avoid incorrect connect
("t", "tˈ"), # To avoid incorrect connect
("ŋ", "ˈŋ"), # To avoid incorrect connect
("ɪ", "ˈɪ"), # To avoid incorrect connect
]
]
def collapse_whitespace(text):
# Regular expression matching whitespace:
_whitespace_re = re.compile(r"\s+")
return re.sub(_whitespace_re, " ", text).strip()
def remove_punctuation_at_begin(text):
return re.sub(r"^[,.!?]+", "", text)
def remove_aux_symbols(text):
text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
return text
def replace_symbols(text):
text = text.replace(";", ",")
text = text.replace("-", " ")
text = text.replace(":", ",")
return text
def replace_punctuation(text):
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
return replaced_text
def text_normalize(text):
text = replace_punctuation(text)
text = replace_symbols(text)
text = remove_aux_symbols(text)
text = remove_punctuation_at_begin(text)
text = collapse_whitespace(text)
text = re.sub(r"([^\.,!\?\-…])$", r"\1.", text)
return text
# special map
def special_map(text):
for regex, replacement in _special_map:
text = re.sub(regex, replacement, text)
return text
def german_to_ipa(text):
text = text_normalize(text)
ipa = phonemize(
text.strip(),
language="de",
backend="espeak",
separator=Separator(phone=None, word=" ", syllable="|"),
strip=True,
preserve_punctuation=True,
njobs=4,
)
# remove "(en)" and "(fr)" tag
ipa = ipa.replace("(en)", "").replace("(de)", "")
ipa = special_map(ipa)
return ipa
|