hriteshMaikap commited on
Commit
f37bdc4
·
verified ·
1 Parent(s): af85ae1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -7
app.py CHANGED
@@ -14,9 +14,9 @@ warnings.filterwarnings('ignore', category=UserWarning, module='librosa')
14
 
15
  # Model repository information
16
  REPO_ID = "hriteshMaikap/languageClassifier"
17
- MODEL_FILENAME = "indic_language_classifier_mtm.keras"
18
- SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
19
- CONFIG_FILENAME = "config_mtm.json"
20
 
21
  # Initialize global variables to store loaded artifacts
22
  model = None
@@ -28,35 +28,50 @@ def load_artifacts():
28
  global model, scaler, config
29
 
30
  try:
 
31
  # Download files from Hugging Face Hub
32
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
33
  scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
34
  config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
35
 
 
 
 
 
36
  # Load model
37
  model = tf.keras.models.load_model(model_path, compile=False)
 
38
 
39
  # Load scaler
40
  scaler = joblib.load(scaler_path)
 
41
 
42
  # Load configuration
43
  with open(config_path, 'r') as f:
44
  config = json.load(f)
 
45
 
46
  return True
47
  except Exception as e:
48
  print(f"Error loading artifacts: {e}")
 
 
49
  return False
50
 
51
  def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
52
  """Extracts audio features directly from audio array."""
53
  try:
 
 
 
54
  mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
 
55
 
56
  if feature_type == 'mfcc_delta':
57
  delta_mfccs = librosa.feature.delta(mfccs)
58
  delta2_mfccs = librosa.feature.delta(mfccs, order=2)
59
  features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
 
60
  elif feature_type == 'mfcc':
61
  features = mfccs
62
  else:
@@ -65,19 +80,25 @@ def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
65
  current_len = features.shape[1]
66
  if current_len > max_pad_len:
67
  features = features[:, :max_pad_len]
 
68
  elif current_len < max_pad_len:
69
  pad_width = max_pad_len - current_len
70
  features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
 
71
 
72
  return features.T # Transpose to (time_steps, features)
73
  except Exception as e:
74
  print(f"Error extracting features: {e}")
 
 
75
  return None
76
 
77
  def classify_language(audio_path):
78
  """Process audio file and classify language."""
79
  global model, scaler, config
80
 
 
 
81
  # Load artifacts if not loaded
82
  if model is None or scaler is None or config is None:
83
  if not load_artifacts():
@@ -91,8 +112,12 @@ def classify_language(audio_path):
91
  class_labels = config.get('class_labels', [])
92
  n_features_expected = config.get('n_features_input', 39)
93
 
 
 
 
94
  # Load and process audio
95
  audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
 
96
 
97
  # Extract features
98
  features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
@@ -100,22 +125,29 @@ def classify_language(audio_path):
100
  return "Error: Failed to extract audio features"
101
 
102
  # Verify feature dimensions
 
103
  if features.shape[1] != n_features_expected:
104
  return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
105
 
106
  # Scale features
107
  features_reshaped = features.reshape(-1, n_features_expected)
 
108
  features_scaled_reshaped = scaler.transform(features_reshaped)
109
  features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
 
110
 
111
  # Predict
 
112
  prediction_probabilities = model.predict(features_final, verbose=0)
113
  predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
 
 
114
 
115
  # Map to language label
116
  if 0 <= predicted_index < len(class_labels):
117
  predicted_language = class_labels[predicted_index]
118
  confidence = prediction_probabilities[0][predicted_index]
 
119
 
120
  # Prepare results to display all probabilities
121
  results = []
@@ -128,7 +160,7 @@ def classify_language(audio_path):
128
 
129
  return result_text
130
  else:
131
- return f"Error: Predicted index {predicted_index} out of bounds for labels"
132
 
133
  except Exception as e:
134
  import traceback
@@ -136,19 +168,32 @@ def classify_language(audio_path):
136
  print(error_msg)
137
  return f"Error: {str(e)}"
138
 
139
- # Create Gradio interface
140
  demo = gr.Interface(
141
  fn=classify_language,
142
  inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
143
  outputs="text",
144
  title="Indian Language Classifier",
145
- description="Upload or record audio in an Indian language, and the model will identify which language it is. Supported languages are defined in the configuration file.",
 
 
 
 
 
 
 
 
 
 
 
146
  examples=[], # You can add example audio files here if available
147
  cache_examples=False
148
  )
149
 
150
- # Load artifacts on startup
 
151
  load_artifacts()
 
152
 
153
  # Launch the app
154
  if __name__ == "__main__":
 
14
 
15
  # Model repository information
16
  REPO_ID = "hriteshMaikap/languageClassifier"
17
+ MODEL_FILENAME = "indic_language_classifier_mtm.keras" # Updated filename
18
+ SCALER_FILENAME = "audio_feature_scaler_mtm.pkl" # Updated filename
19
+ CONFIG_FILENAME = "config_mtm.json" # Updated filename
20
 
21
  # Initialize global variables to store loaded artifacts
22
  model = None
 
28
  global model, scaler, config
29
 
30
  try:
31
+ print(f"Loading artifacts from {REPO_ID}...")
32
  # Download files from Hugging Face Hub
33
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
34
  scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
35
  config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
36
 
37
+ print(f"Model path: {model_path}")
38
+ print(f"Scaler path: {scaler_path}")
39
+ print(f"Config path: {config_path}")
40
+
41
  # Load model
42
  model = tf.keras.models.load_model(model_path, compile=False)
43
+ print("Model loaded successfully")
44
 
45
  # Load scaler
46
  scaler = joblib.load(scaler_path)
47
+ print("Scaler loaded successfully")
48
 
49
  # Load configuration
50
  with open(config_path, 'r') as f:
51
  config = json.load(f)
52
+ print(f"Config loaded successfully: {config.keys()}")
53
 
54
  return True
55
  except Exception as e:
56
  print(f"Error loading artifacts: {e}")
57
+ import traceback
58
+ traceback.print_exc()
59
  return False
60
 
61
  def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
62
  """Extracts audio features directly from audio array."""
63
  try:
64
+ print(f"Extracting features: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
65
+ print(f"Audio shape: {audio.shape if hasattr(audio, 'shape') else 'unknown'}, Sample rate: {sample_rate}")
66
+
67
  mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
68
+ print(f"MFCC shape: {mfccs.shape}")
69
 
70
  if feature_type == 'mfcc_delta':
71
  delta_mfccs = librosa.feature.delta(mfccs)
72
  delta2_mfccs = librosa.feature.delta(mfccs, order=2)
73
  features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
74
+ print(f"Combined features shape: {features.shape}")
75
  elif feature_type == 'mfcc':
76
  features = mfccs
77
  else:
 
80
  current_len = features.shape[1]
81
  if current_len > max_pad_len:
82
  features = features[:, :max_pad_len]
83
+ print(f"Features truncated to {features.shape}")
84
  elif current_len < max_pad_len:
85
  pad_width = max_pad_len - current_len
86
  features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
87
+ print(f"Features padded to {features.shape}")
88
 
89
  return features.T # Transpose to (time_steps, features)
90
  except Exception as e:
91
  print(f"Error extracting features: {e}")
92
+ import traceback
93
+ traceback.print_exc()
94
  return None
95
 
96
  def classify_language(audio_path):
97
  """Process audio file and classify language."""
98
  global model, scaler, config
99
 
100
+ print(f"Processing audio file: {audio_path}")
101
+
102
  # Load artifacts if not loaded
103
  if model is None or scaler is None or config is None:
104
  if not load_artifacts():
 
112
  class_labels = config.get('class_labels', [])
113
  n_features_expected = config.get('n_features_input', 39)
114
 
115
+ print(f"Config parameters: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
116
+ print(f"Expected features: {n_features_expected}, Classes: {class_labels}")
117
+
118
  # Load and process audio
119
  audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
120
+ print(f"Loaded audio: duration={len(audio)/sample_rate:.2f}s, sample_rate={sample_rate}Hz")
121
 
122
  # Extract features
123
  features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
 
125
  return "Error: Failed to extract audio features"
126
 
127
  # Verify feature dimensions
128
+ print(f"Features shape: {features.shape}")
129
  if features.shape[1] != n_features_expected:
130
  return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
131
 
132
  # Scale features
133
  features_reshaped = features.reshape(-1, n_features_expected)
134
+ print(f"Reshaped for scaling: {features_reshaped.shape}")
135
  features_scaled_reshaped = scaler.transform(features_reshaped)
136
  features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
137
+ print(f"Final features shape for prediction: {features_final.shape}")
138
 
139
  # Predict
140
+ print("Running prediction...")
141
  prediction_probabilities = model.predict(features_final, verbose=0)
142
  predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
143
+ print(f"Prediction complete. Raw output shape: {prediction_probabilities.shape}")
144
+ print(f"Predicted index: {predicted_index}")
145
 
146
  # Map to language label
147
  if 0 <= predicted_index < len(class_labels):
148
  predicted_language = class_labels[predicted_index]
149
  confidence = prediction_probabilities[0][predicted_index]
150
+ print(f"Predicted language: {predicted_language}, Confidence: {confidence:.2%}")
151
 
152
  # Prepare results to display all probabilities
153
  results = []
 
160
 
161
  return result_text
162
  else:
163
+ return f"Error: Predicted index {predicted_index} out of bounds for labels (0-{len(class_labels)-1})"
164
 
165
  except Exception as e:
166
  import traceback
 
168
  print(error_msg)
169
  return f"Error: {str(e)}"
170
 
171
+ # Create Gradio interface with additional information
172
  demo = gr.Interface(
173
  fn=classify_language,
174
  inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
175
  outputs="text",
176
  title="Indian Language Classifier",
177
+ description="Upload or record audio in an Indian language, and the model will identify which language it is. "
178
+ "The model supports multiple Indian languages as defined in the configuration file.",
179
+ article="""
180
+ ### Tips for Best Results
181
+ - Speak clearly in one of the supported Indian languages
182
+ - Try to record in a quiet environment
183
+ - Recordings should be at least 2-3 seconds long for best results
184
+
185
+ ### How it Works
186
+ This model extracts MFCC features from your audio and uses a neural network
187
+ trained on multiple Indian languages to predict which language you're speaking.
188
+ """,
189
  examples=[], # You can add example audio files here if available
190
  cache_examples=False
191
  )
192
 
193
+ # Load artifacts on startup to prevent cold start
194
+ print("Initializing application...")
195
  load_artifacts()
196
+ print("Application initialized successfully!")
197
 
198
  # Launch the app
199
  if __name__ == "__main__":