smtiitm
/

Fastspeech2_HS

Model card Files Files and versions Community

File size: 8,461 Bytes

2c8dc05

# import sys, os
# SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
# sys.path.append(SCRIPT_DIR)

# combined lexical analyzer and parser

from Unified_parser.ply.lex import Lexer
from Unified_parser.ply.yacc import yacc
from Unified_parser.globals import *
from Unified_parser.helpers import *
import sys
from sys import exit
from Unified_parser.get_phone_mapped_python import *

# tokens used
tokens = ('kaki_c', 'conjsyll2_c', 'fullvowel_b', 'kaki_a', 'kaki_b',  'conjsyll2_b', 'conjsyll2_a',
        'conjsyll1', 'nukchan_b','nukchan_a', 'yarule', 'fullvowel_a', 'vowel')

# parser part

def p_sentence(p):
    '''
    sentence : words
    '''
    if p.parser.g.flags.parseLevel == 0:
        p.parser.g.words.syllabifiedWordOut = p[1]

        if p.parser.g.words.syllabifiedWordOut.find('&&') != -1:
            p.parser.g.words.syllabifiedWordOut = rec_replace(p.parser.g.words.syllabifiedWordOut,'&&','&')
        
        p.parser.g.flags.parseLevel += 1
    else:
        p.parser.g.words.phonifiedWord = p[1]

def p_words_syltoken(p):
    '''
    words : syltoken
    '''
    if(p.parser.g.flags.DEBUG):
        print(f"Syll:\t{p[1]}")
    p[0] = p[1]

def p_words_wordsandsyltoken(p):
    '''
    words : words syltoken
    '''
    if(p.parser.g.flags.DEBUG):
        print(f"Syll:\t{p[2]}")
    p[0] = p[1] + p[2]

def p_syltoken(p):
    '''
    syltoken : fullvowel_b
             | fullvowel_a
             | conjsyll2_c
             | conjsyll2_b
             | conjsyll2_a
             | conjsyll1 
             | nukchan_b
             | nukchan_a
             | yarule
             | vowel
    '''
    p[0] = p[1]

def p_syltoken1(p):
    '''
    syltoken :
             | kaki_c
             | kaki_a
             | kaki_b
    '''
    if (p.parser.g.flags.DEBUG):
        print(f'kaki : {p[1]}')
    p[0] = p[1]

def p_error(p):
    print('parse error')
    exit(1)

# print the help of syntax
def printHelp():

    print("UnifiedParser - Usage Instructions")
    print("Run python3 parser.py wd lsflag wfflag clearflag")
    print("wd - word to parse in unicode.")
    print("lsflag - always 0. we are not using this.")
    print("wfflag - 0 for Monophone parsing, 1 for syllable parsing, 2 for Akshara Parsing")
    print("clearflag -0. for normal syntx, 1 for removing the lisp like format of output and to just produce space separated output.  2 for cls labled output")
    print("language - name of language in quationes")

def wordparse(wd : str, lsflag : int, wfflag : int, clearflag : int,language_main :str):
    g = GLOBALS()
    lexer = Lexer()
    parser = yacc()
    parser.g = g
    g.flags.DEBUG = False
    wd = wd.strip('  ') # hidden characters

    if lsflag not in [0,1] or wfflag not in [0,1,2]:
        print("Invalid input")
        exit(1)
    
    g.flags.LangSpecificCorrectionFlag = lsflag
    
    g.flags.writeFormat = wfflag
    if wfflag == 4:
        g.flags.writeFormat = 1
        g.flags.syllTagFlag = 1
    
    word = wd
    if g.flags.DEBUG:
        print(f'Word : {word}')

    word = RemoveUnwanted(word)
    if g.flags.DEBUG:
        print(f'Cleared Word : {word}')

    if SetlanguageFeat(g, word) == 0:
        return 0
    
    if CheckDictionary(g, word) != 0:
        return 0

    if g.flags.DEBUG:
        print(f'langId : {g.langId}')
    
    word = ConvertToSymbols(g, word)

    if g.flags.DEBUG:
        print(f"Symbols code : {g.words.unicodeWord}")
        print(f"Symbols syllables : {g.words.syllabifiedWord}")
        
    language_main=language_main.lower()
    # if language_main == "telugu":
    #     g.currLang = g.TELUGU
    #     g.langId = 3
    #     if(g.langId < 5):
    #         g.isSouth = 1
    #     print("language:",language_main)

    # if language_main == "marathi":
    #     print("inside main")
    #     g.currLang = g.KANNADA
    #     g.langId = 4
    #     if(g.langId < 5):
    #         g.isSouth = 1
    #     print("language:",language_main)
        
    parser.parse(g.words.syllabifiedWord, lexer=lexer)
    if(g.flags.DEBUG):
        print(f"Syllabified Word : {g.words.syllabifiedWordOut}")
    g.words.syllabifiedWordOut = rec_replace(g.words.syllabifiedWordOut, '&#&','&') + '&'
    if(g.flags.DEBUG):
        print(f"Syllabified Word out : {g.words.syllabifiedWordOut}")
    g.words.syllabifiedWordOut = LangSpecificCorrection(g, g.words.syllabifiedWordOut, g.flags.LangSpecificCorrectionFlag)
    if(g.flags.DEBUG):
        print(f"Syllabified Word langCorr : {g.words.syllabifiedWordOut}")
    # if(g.flags.DEBUG):
    #     print(f"Syllabified Word gemCorr : {g.words.syllabifiedWordOut}")
    g.words.syllabifiedWordOut = CleanseWord(g.words.syllabifiedWordOut)
    if(g.flags.DEBUG):
        print(f"Syllabified Word memCorr : {g.words.syllabifiedWordOut}")

    if not g.isSouth:
        if g.flags.DEBUG:
            print('NOT SOUTH')
        count = 0
        for i in range(len(g.words.syllabifiedWordOut)):
            if g.words.syllabifiedWordOut[i] == '&':
                count += 1
        splitPosition = 2
        if GetPhoneType(g, g.words.syllabifiedWordOut, 1) == 1:
            if count > 2:
                tpe = GetPhoneType(g, g.words.syllabifiedWordOut, 2)
                if tpe == 2:
                    splitPosition = 1
                elif tpe == 3:
                    splitPosition = 3
            else:
                splitPosition = 1
        count = 0
        for i in range(len(g.words.syllabifiedWordOut)):
            if g.words.syllabifiedWordOut[i] == '&':
                count += 1
            if count > splitPosition:
                count = i
                break
        start, end = g.words.syllabifiedWordOut, g.words.syllabifiedWordOut
        end = end[count:]
        start = start[:count]
        if(g.flags.DEBUG):
            print(f"posi {count} {start} {end}")
        end = SchwaSpecificCorrection(g, end)
        if(g.flags.DEBUG):
            print(f"prefinal : {g.words.syllabifiedWordOut}")
        g.words.syllabifiedWordOut = start + end
        if(g.flags.DEBUG):
            print(f"prefinal1 : {g.words.syllabifiedWordOut}")
        g.words.syllabifiedWordOut = CleanseWord(g.words.syllabifiedWordOut)
        if(g.flags.DEBUG):
            print(f"final : {g.words.syllabifiedWordOut}")
        g.words.syllabifiedWordOut = SchwaDoubleConsonent(g.words.syllabifiedWordOut)
        if(g.flags.DEBUG):
            print(f"final0 : {g.words.syllabifiedWordOut}")
    
    g.words.syllabifiedWordOut = GeminateCorrection(g.words.syllabifiedWordOut, 0)
    g.words.syllabifiedWordOut = MiddleVowel(g, g.words.syllabifiedWordOut)
    g.words.syllabifiedWordOut = Syllabilfy(g.words.syllabifiedWordOut)
    
    SplitSyllables(g,g.words.syllabifiedWordOut)
    temp_list=g.syllableList
    if g.flags.DEBUG:
        print("temp_list_1",temp_list)
    # print("temp_list_1",temp_list)
    # for i in range(len(temp_list)):
    #     print("output_a", temp_list[i] )
    #     temp_list[i]=convert_to_main_lang (g,temp_list[i], language_main)
    #     print("output_b", temp_list[i] )


    WritetoFiles(g)
    if clearflag == 0:
        return g.answer
    if clearflag == 1:
        t = g.words.outputText
        t = t.split('"')
        ln = len(t)
        i = 1
        g.answer = ''
        while i < ln:
            g.answer += t[i] + ' '
            i += 2
        g.answer.strip()
        return g.answer
    if clearflag == 2:
        t = g.words.outputText
        t = t.split('"')
        # print(t)
        ln = len(t)
        i = 1
        g.answer = ''
        text_replacer=TextReplacer()
        while i < ln:
            if len(t[i])>1 and ord(t[i][0])<128 and ord(t[i][0])>=65:
                temp = text_replacer.apply_replacements_by_phonems(t[i])
                g.answer += temp
            elif len(t[i])==1 and ord(t[i][0])<128 and ord(t[i][0])>=65:
                g.answer += t[i]
            i += 1
        g.answer.strip()
        return g.answer


def safe_word_parse(wd : str, lsflag : int, wfflag : int, clearflag : int,language : str):
    try:
        ans = wordparse(wd, lsflag, wfflag, clearflag,language)
        # print(ans) 
    except:
        print(f'failed on {wd}')
        ans = 'FAILED'
    return wd, ans,


if __name__ == '__main__':

    if (len(sys.argv) != 6):
        printHelp()
        exit(-1)
    
    ans = wordparse(sys.argv[1], int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]),str(sys.argv[5]))
    print(ans)