Fastspeech2_HS / ssn_parser_new /scripts /ortho_to_phonetic1.py
utkarsh2299's picture
Upload 55 files
76690c5 verified
import re
def cat(file):
with open(file, 'r') as f:
return f.read()
def ortho_to_phonetic(input_file, phone_list_file, output_file):
with open(input_file, 'r') as f:
words = f.read().split()
with open(phone_list_file, 'r') as f:
phone_list = set(f.read().splitlines())
word_start = 0
with open(output_file, 'w') as phn_handle:
while word_start < len(words):
word = words[word_start]
if word != "SIL":
num = len(word)
phone_start1 = 0
while phone_start1 < num:
p1 = word[phone_start1:phone_start1 + 2]
p2 = word[phone_start1:phone_start1 + 3]
p3 = word[phone_start1:phone_start1 + 4]
p4 = word[phone_start1:phone_start1 + 5]
p5 = word[phone_start1:phone_start1 + 6]
cou = len(set(re.findall(rf'\b{re.escape(p1)}\b', cat(phone_list_file))))
cou1 = len(set(re.findall(rf'\b{re.escape(p2)}\b', cat(phone_list_file))))
cou2 = len(set(re.findall(rf'\b{re.escape(p3)}\b', cat(phone_list_file))))
cou3 = len(set(re.findall(rf'\b{re.escape(p4)}\b', cat(phone_list_file))))
cou4 = len(set(re.findall(rf'\b{re.escape(p5)}\b', cat(phone_list_file))))
if cou4 == 1:
phn_handle.write(p5 + "\n")
phone_start1 += 6
elif cou3 == 1:
phn_handle.write(p4 + "\n")
phone_start1 += 5
elif cou2 == 1:
phn_handle.write(p3 + "\n")
phone_start1 += 4
elif cou1 == 1:
phn_handle.write(p2 + "\n")
phone_start1 += 3
elif cou == 1:
phn_handle.write(p1 + "\n")
phone_start1 += 2
else:
p1 = word[phone_start1]
if p1 in [",", "."]:
phone_start1 += 1
else:
phn_handle.write(p1 + "\n")
phone_start1 += 1
else:
phn_handle.write("SIL\n")
break
word_start += 1
if __name__ == "__main__":
import sys
if len(sys.argv) != 4:
print("Usage: python script.py input_file phone_list output_file")
sys.exit(0)
#print("Test -- 6")
input_file, phone_list_file, output_file = sys.argv[1], sys.argv[2], sys.argv[3]
#print("output_file", output_file)
ortho_to_phonetic(input_file, phone_list_file, output_file)