Update app.py
Browse files
app.py
CHANGED
@@ -17,12 +17,16 @@ MODELS = {
|
|
17 |
"epitran": epitran.Epitran("ara-Arab")
|
18 |
},
|
19 |
"English": {
|
20 |
-
"processor": Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-
|
21 |
-
"model": Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-
|
22 |
"epitran": epitran.Epitran("eng-Latn")
|
23 |
}
|
24 |
}
|
25 |
|
|
|
|
|
|
|
|
|
26 |
def clean_phonemes(ipa):
|
27 |
"""Remove diacritics and length markers from phonemes"""
|
28 |
return re.sub(r'[\u064B-\u0652\u02D0]', '', ipa)
|
@@ -42,7 +46,7 @@ def analyze_phonemes(language, reference_text, audio_file):
|
|
42 |
ref_phonemes.append(list(ipa_clean))
|
43 |
|
44 |
# Process audio file
|
45 |
-
audio, sr = librosa.load(audio_file
|
46 |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
47 |
|
48 |
# Get transcription
|
@@ -147,7 +151,7 @@ with gr.Blocks() as demo:
|
|
147 |
value=get_default_text("Arabic")
|
148 |
)
|
149 |
|
150 |
-
audio_input = gr.
|
151 |
submit_btn = gr.Button("Analyze")
|
152 |
output = gr.JSON(label="Phoneme Alignment Results")
|
153 |
|
|
|
17 |
"epitran": epitran.Epitran("ara-Arab")
|
18 |
},
|
19 |
"English": {
|
20 |
+
"processor": Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h"),
|
21 |
+
"model": Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h"),
|
22 |
"epitran": epitran.Epitran("eng-Latn")
|
23 |
}
|
24 |
}
|
25 |
|
26 |
+
# Suppress the warning about newly initialized weights
|
27 |
+
for lang in MODELS.values():
|
28 |
+
lang["model"].config.ctc_loss_reduction = "mean"
|
29 |
+
|
30 |
def clean_phonemes(ipa):
|
31 |
"""Remove diacritics and length markers from phonemes"""
|
32 |
return re.sub(r'[\u064B-\u0652\u02D0]', '', ipa)
|
|
|
46 |
ref_phonemes.append(list(ipa_clean))
|
47 |
|
48 |
# Process audio file
|
49 |
+
audio, sr = librosa.load(audio_file, sr=16000)
|
50 |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
51 |
|
52 |
# Get transcription
|
|
|
151 |
value=get_default_text("Arabic")
|
152 |
)
|
153 |
|
154 |
+
audio_input = gr.Audio(label="Upload Audio File", type="filepath")
|
155 |
submit_btn = gr.Button("Analyze")
|
156 |
output = gr.JSON(label="Phoneme Alignment Results")
|
157 |
|