smtiitm
/

Fastspeech2_HS

Model card Files Files and versions Community

Fastspeech2_HS / Unified_parser /pypi_package /src /indic_unified_parser /uparser.py

utkarsh2299

Upload 97 files

2c8dc05 verified about 1 year ago

raw

history blame contribute delete

6.53 kB

	import sys, os
	SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
	sys.path.append(SCRIPT_DIR)

	# combined lexical analyzer and parser

	from ply.lex import Lexer
	from ply.yacc import yacc
	from globals import *
	from helpers import *
	import sys
	from sys import exit

	# tokens used
	tokens = ('kaki_c', 'conjsyll2_c', 'fullvowel_b', 'kaki_a', 'kaki_b', 'conjsyll2_b', 'conjsyll2_a',
	'conjsyll1', 'nukchan_b','nukchan_a', 'yarule', 'fullvowel_a', 'vowel')

	# parser part

	def p_sentence(p):
	'''
	sentence : words
	'''
	if p.parser.g.flags.parseLevel == 0:
	p.parser.g.words.syllabifiedWordOut = p[1]

	if p.parser.g.words.syllabifiedWordOut.find('&&') != -1:
	p.parser.g.words.syllabifiedWordOut = rec_replace(p.parser.g.words.syllabifiedWordOut,'&&','&')

	p.parser.g.flags.parseLevel += 1
	else:
	p.parser.g.words.phonifiedWord = p[1]

	def p_words_syltoken(p):
	'''
	words : syltoken
	'''
	if(p.parser.g.flags.DEBUG):
	print(f"Syll:\t{p[1]}")
	p[0] = p[1]

	def p_words_wordsandsyltoken(p):
	'''
	words : words syltoken
	'''
	if(p.parser.g.flags.DEBUG):
	print(f"Syll:\t{p[2]}")
	p[0] = p[1] + p[2]

	def p_syltoken(p):
	'''
	syltoken : fullvowel_b
	\| fullvowel_a
	\| conjsyll2_c
	\| conjsyll2_b
	\| conjsyll2_a
	\| conjsyll1
	\| nukchan_b
	\| nukchan_a
	\| yarule
	\| vowel
	'''
	p[0] = p[1]

	def p_syltoken1(p):
	'''
	syltoken :
	\| kaki_c
	\| kaki_a
	\| kaki_b
	'''
	if (p.parser.g.flags.DEBUG):
	print(f'kaki : {p[1]}')
	p[0] = p[1]

	def p_error(p):
	print('parse error')
	exit(1)

	# print the help of syntax
	def printHelp():

	print("UnifiedParser - Usage Instructions")
	print("Run python3 parser.py wd lsflag wfflag clearflag")
	print("wd - word to parse in unicode.")
	print("lsflag - always 0. we are not using this.")
	print("wfflag - 0 for Monophone parsing, 1 for syllable parsing, 2 for Akshara Parsing")
	print("clearflag - 1 for removing the lisp like format of output and to just produce space separated output. Otherwise, 0.")


	def wordparse(wd : str, lsflag : int, wfflag : int, clearflag : int):
	g = GLOBALS()
	lexer = Lexer()
	parser = yacc()
	parser.g = g
	g.flags.DEBUG = False
	wd = wd.strip(' ') # hidden characters

	if lsflag not in [0,1] or wfflag not in [0,1,2]:
	print("Invalid input")
	exit(1)

	g.flags.LangSpecificCorrectionFlag = lsflag

	g.flags.writeFormat = wfflag
	if wfflag == 4:
	g.flags.writeFormat = 1
	g.flags.syllTagFlag = 1

	word = wd
	if g.flags.DEBUG:
	print(f'Word : {word}')

	word = RemoveUnwanted(word)
	if g.flags.DEBUG:
	print(f'Cleared Word : {word}')

	if SetlanguageFeat(g, word) == 0:
	return 0

	if CheckDictionary(g, word) != 0:
	return 0

	if g.flags.DEBUG:
	print(f'langId : {g.langId}')

	word = ConvertToSymbols(g, word)

	if g.flags.DEBUG:
	print(f"Symbols code : {g.words.unicodeWord}")
	print(f"Symbols syllables : {g.words.syllabifiedWord}")

	parser.parse(g.words.syllabifiedWord, lexer=lexer)
	if(g.flags.DEBUG):
	print(f"Syllabified Word : {g.words.syllabifiedWordOut}")
	g.words.syllabifiedWordOut = rec_replace(g.words.syllabifiedWordOut, '&#&','&') + '&'
	if(g.flags.DEBUG):
	print(f"Syllabified Word out : {g.words.syllabifiedWordOut}")
	g.words.syllabifiedWordOut = LangSpecificCorrection(g, g.words.syllabifiedWordOut, g.flags.LangSpecificCorrectionFlag)
	if(g.flags.DEBUG):
	print(f"Syllabified Word langCorr : {g.words.syllabifiedWordOut}")
	if(g.flags.DEBUG):
	print(f"Syllabified Word gemCorr : {g.words.syllabifiedWordOut}")
	g.words.syllabifiedWordOut = CleanseWord(g.words.syllabifiedWordOut)
	if(g.flags.DEBUG):
	print(f"Syllabified Word memCorr : {g.words.syllabifiedWordOut}")

	if not g.isSouth:
	if g.flags.DEBUG:
	print('NOT SOUTH')
	count = 0
	for i in range(len(g.words.syllabifiedWordOut)):
	if g.words.syllabifiedWordOut[i] == '&':
	count += 1
	splitPosition = 2
	if GetPhoneType(g, g.words.syllabifiedWordOut, 1) == 1:
	if count > 2:
	tpe = GetPhoneType(g, g.words.syllabifiedWordOut, 2)
	if tpe == 2:
	splitPosition = 1
	elif tpe == 3:
	splitPosition = 3
	else:
	splitPosition = 1
	count = 0
	for i in range(len(g.words.syllabifiedWordOut)):
	if g.words.syllabifiedWordOut[i] == '&':
	count += 1
	if count > splitPosition:
	count = i
	break
	start, end = g.words.syllabifiedWordOut, g.words.syllabifiedWordOut
	end = end[count:]
	start = start[:count]
	if(g.flags.DEBUG):
	print(f"posi {count} {start} {end}")
	end = SchwaSpecificCorrection(g, end)
	if(g.flags.DEBUG):
	print(f"prefinal : {g.words.syllabifiedWordOut}")
	g.words.syllabifiedWordOut = start + end
	if(g.flags.DEBUG):
	print(f"prefinal1 : {g.words.syllabifiedWordOut}")
	g.words.syllabifiedWordOut = CleanseWord(g.words.syllabifiedWordOut)
	if(g.flags.DEBUG):
	print(f"final : {g.words.syllabifiedWordOut}")
	g.words.syllabifiedWordOut = SchwaDoubleConsonent(g.words.syllabifiedWordOut)
	if(g.flags.DEBUG):
	print(f"final0 : {g.words.syllabifiedWordOut}")

	g.words.syllabifiedWordOut = GeminateCorrection(g.words.syllabifiedWordOut, 0)
	g.words.syllabifiedWordOut = MiddleVowel(g, g.words.syllabifiedWordOut)
	g.words.syllabifiedWordOut = Syllabilfy(g.words.syllabifiedWordOut)

	SplitSyllables(g,g.words.syllabifiedWordOut)

	WritetoFiles(g)
	if clearflag == 1:
	t = g.words.outputText
	t = t.split('"')
	ln = len(t)
	i = 1
	g.answer = ''
	while i < ln:
	g.answer += t[i] + ' '
	i += 2
	g.answer.strip()
	return g.answer

	if __name__ == '__main__':

	if (len(sys.argv) != 5):
	printHelp()
	exit(-1)

	ans = wordparse(sys.argv[1], int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]))
	print(ans)