101Frost commited on
Commit
d949c72
ยท
verified ยท
1 Parent(s): dd8edb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -17
app.py CHANGED
@@ -9,17 +9,31 @@ import editdistance
9
  from jiwer import wer
10
  import json
11
 
12
- # Load model once at startup
13
- model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
14
- processor = Wav2Vec2Processor.from_pretrained(model_name)
15
- model = Wav2Vec2ForCTC.from_pretrained(model_name)
16
- epi = epitran.Epitran('ara-Arab')
 
 
 
 
 
 
 
 
17
 
18
  def clean_phonemes(ipa):
19
  """Remove diacritics and length markers from phonemes"""
20
  return re.sub(r'[\u064B-\u0652\u02D0]', '', ipa)
21
 
22
  def analyze_phonemes(language, reference_text, audio_file):
 
 
 
 
 
 
23
  # Convert reference text to phonemes
24
  ref_phonemes = []
25
  for word in reference_text.split():
@@ -46,6 +60,7 @@ def analyze_phonemes(language, reference_text, audio_file):
46
 
47
  # Prepare results in JSON format
48
  results = {
 
49
  "reference_text": reference_text,
50
  "transcription": transcription,
51
  "word_alignment": [],
@@ -110,17 +125,43 @@ def analyze_phonemes(language, reference_text, audio_file):
110
 
111
  return json.dumps(results, indent=2, ensure_ascii=False)
112
 
113
- # Create Gradio interface
114
- demo = gr.Interface(
115
- fn=analyze_phonemes,
116
- inputs=[
117
- gr.Dropdown(["Arabic"], label="Language", value="Arabic"),
118
- gr.Textbox(label="Reference Text", value="ููŽุจูุฃูŽูŠู‘ู ุขู„ูŽุงุกู ุฑูŽุจู‘ููƒูู…ูŽุง ุชููƒูŽุฐู‘ูุจูŽุงู†ู"),
119
- gr.File(label="Upload Audio File", type="file")
120
- ],
121
- outputs=gr.JSON(label="Phoneme Alignment Results"),
122
- title="Arabic Phoneme Alignment Analysis",
123
- description="Compare audio pronunciation with reference text at phoneme level"
124
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  demo.launch()
 
9
  from jiwer import wer
10
  import json
11
 
12
+ # Load both models at startup
13
+ MODELS = {
14
+ "Arabic": {
15
+ "processor": Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-arabic"),
16
+ "model": Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-arabic"),
17
+ "epitran": epitran.Epitran("ara-Arab")
18
+ },
19
+ "English": {
20
+ "processor": Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
21
+ "model": Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
22
+ "epitran": epitran.Epitran("eng-Latn")
23
+ }
24
+ }
25
 
26
  def clean_phonemes(ipa):
27
  """Remove diacritics and length markers from phonemes"""
28
  return re.sub(r'[\u064B-\u0652\u02D0]', '', ipa)
29
 
30
  def analyze_phonemes(language, reference_text, audio_file):
31
+ # Get the appropriate model components
32
+ lang_models = MODELS[language]
33
+ processor = lang_models["processor"]
34
+ model = lang_models["model"]
35
+ epi = lang_models["epitran"]
36
+
37
  # Convert reference text to phonemes
38
  ref_phonemes = []
39
  for word in reference_text.split():
 
60
 
61
  # Prepare results in JSON format
62
  results = {
63
+ "language": language,
64
  "reference_text": reference_text,
65
  "transcription": transcription,
66
  "word_alignment": [],
 
125
 
126
  return json.dumps(results, indent=2, ensure_ascii=False)
127
 
128
+ # Create Gradio interface with language-specific default text
129
+ def get_default_text(language):
130
+ return {
131
+ "Arabic": "ููŽุจูุฃูŽูŠู‘ู ุขู„ูŽุงุกู ุฑูŽุจู‘ููƒูู…ูŽุง ุชููƒูŽุฐู‘ูุจูŽุงู†ู",
132
+ "English": "The quick brown fox jumps over the lazy dog"
133
+ }.get(language, "")
134
+
135
+ with gr.Blocks() as demo:
136
+ gr.Markdown("# Multilingual Phoneme Alignment Analysis")
137
+ gr.Markdown("Compare audio pronunciation with reference text at phoneme level")
138
+
139
+ with gr.Row():
140
+ language = gr.Dropdown(
141
+ ["Arabic", "English"],
142
+ label="Language",
143
+ value="Arabic"
144
+ )
145
+ reference_text = gr.Textbox(
146
+ label="Reference Text",
147
+ value=get_default_text("Arabic")
148
+ )
149
+
150
+ audio_input = gr.File(label="Upload Audio File", type="file")
151
+ submit_btn = gr.Button("Analyze")
152
+ output = gr.JSON(label="Phoneme Alignment Results")
153
+
154
+ # Update default text when language changes
155
+ language.change(
156
+ fn=get_default_text,
157
+ inputs=language,
158
+ outputs=reference_text
159
+ )
160
+
161
+ submit_btn.click(
162
+ fn=analyze_phonemes,
163
+ inputs=[language, reference_text, audio_input],
164
+ outputs=output
165
+ )
166
 
167
  demo.launch()