Spaces:
Sleeping
Sleeping
File size: 1,270 Bytes
05d3571 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# Remove non printable char as per:
# https://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
#
# This is supposed to be a drop in replacement to moses strip-non-printing-char.perl
import sys
import unicodedata
def get_replacer(replace_by: str = " ") -> str:
non_printable_map = {
ord(c): replace_by
for c in (chr(i) for i in range(sys.maxunicode + 1))
# same as \p{C} in perl
# see https://www.unicode.org/reports/tr44/#General_Category_Values
if unicodedata.category(c) in {"C", "Cc", "Cf", "Cs", "Co", "Cn"}
}
def replace_non_printing_char(line) -> str:
return line.translate(non_printable_map)
return replace_non_printing_char
def test_remove():
replaceby_ = get_replacer("_")
assert (
replaceby_("See what's hidden in your string… or behind")
== "See what's hidden in your string…_or be_hind_"
)
replacebyspace = get_replacer(" ")
assert replacebyspace("\x00\x11Hello\u200bWorld") == " Hello World"
|