101Frost commited on
Commit
2222b3b
·
verified ·
1 Parent(s): 1040e0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -17,12 +17,16 @@ MODELS = {
17
  "epitran": epitran.Epitran("ara-Arab")
18
  },
19
  "English": {
20
- "processor": Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
21
- "model": Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
22
  "epitran": epitran.Epitran("eng-Latn")
23
  }
24
  }
25
 
 
 
 
 
26
  def clean_phonemes(ipa):
27
  """Remove diacritics and length markers from phonemes"""
28
  return re.sub(r'[\u064B-\u0652\u02D0]', '', ipa)
@@ -42,7 +46,7 @@ def analyze_phonemes(language, reference_text, audio_file):
42
  ref_phonemes.append(list(ipa_clean))
43
 
44
  # Process audio file
45
- audio, sr = librosa.load(audio_file.name, sr=16000)
46
  input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
47
 
48
  # Get transcription
@@ -147,7 +151,7 @@ with gr.Blocks() as demo:
147
  value=get_default_text("Arabic")
148
  )
149
 
150
- audio_input = gr.File(label="Upload Audio File", type="file")
151
  submit_btn = gr.Button("Analyze")
152
  output = gr.JSON(label="Phoneme Alignment Results")
153
 
 
17
  "epitran": epitran.Epitran("ara-Arab")
18
  },
19
  "English": {
20
+ "processor": Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h"),
21
+ "model": Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h"),
22
  "epitran": epitran.Epitran("eng-Latn")
23
  }
24
  }
25
 
26
+ # Suppress the warning about newly initialized weights
27
+ for lang in MODELS.values():
28
+ lang["model"].config.ctc_loss_reduction = "mean"
29
+
30
  def clean_phonemes(ipa):
31
  """Remove diacritics and length markers from phonemes"""
32
  return re.sub(r'[\u064B-\u0652\u02D0]', '', ipa)
 
46
  ref_phonemes.append(list(ipa_clean))
47
 
48
  # Process audio file
49
+ audio, sr = librosa.load(audio_file, sr=16000)
50
  input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
51
 
52
  # Get transcription
 
151
  value=get_default_text("Arabic")
152
  )
153
 
154
+ audio_input = gr.Audio(label="Upload Audio File", type="filepath")
155
  submit_btn = gr.Button("Analyze")
156
  output = gr.JSON(label="Phoneme Alignment Results")
157