hriteshMaikap commited on
Commit
c475932
·
verified ·
1 Parent(s): f37bdc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -182
app.py CHANGED
@@ -1,200 +1,73 @@
1
  import gradio as gr
2
  import numpy as np
3
  import librosa
4
- import tensorflow as tf
5
- import json
6
- import os
7
  import joblib
 
 
8
  from huggingface_hub import hf_hub_download
9
- import warnings
10
-
11
- # Suppress specific warnings
12
- warnings.filterwarnings('ignore', category=FutureWarning, module='librosa')
13
- warnings.filterwarnings('ignore', category=UserWarning, module='librosa')
14
 
15
- # Model repository information
16
  REPO_ID = "hriteshMaikap/languageClassifier"
17
- MODEL_FILENAME = "indic_language_classifier_mtm.keras" # Updated filename
18
- SCALER_FILENAME = "audio_feature_scaler_mtm.pkl" # Updated filename
19
- CONFIG_FILENAME = "config_mtm.json" # Updated filename
20
 
21
- # Initialize global variables to store loaded artifacts
22
- model = None
23
- scaler = None
24
- config = None
25
 
26
- def load_artifacts():
27
- """Load model, scaler and configuration from Hugging Face Hub."""
28
- global model, scaler, config
29
-
30
- try:
31
- print(f"Loading artifacts from {REPO_ID}...")
32
- # Download files from Hugging Face Hub
33
- model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
34
- scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
35
- config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
36
-
37
- print(f"Model path: {model_path}")
38
- print(f"Scaler path: {scaler_path}")
39
- print(f"Config path: {config_path}")
40
-
41
- # Load model
42
- model = tf.keras.models.load_model(model_path, compile=False)
43
- print("Model loaded successfully")
44
-
45
- # Load scaler
46
- scaler = joblib.load(scaler_path)
47
- print("Scaler loaded successfully")
48
-
49
- # Load configuration
50
- with open(config_path, 'r') as f:
51
- config = json.load(f)
52
- print(f"Config loaded successfully: {config.keys()}")
53
-
54
- return True
55
- except Exception as e:
56
- print(f"Error loading artifacts: {e}")
57
- import traceback
58
- traceback.print_exc()
59
- return False
60
 
61
- def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
62
- """Extracts audio features directly from audio array."""
63
- try:
64
- print(f"Extracting features: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
65
- print(f"Audio shape: {audio.shape if hasattr(audio, 'shape') else 'unknown'}, Sample rate: {sample_rate}")
66
-
67
- mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
68
- print(f"MFCC shape: {mfccs.shape}")
69
-
70
- if feature_type == 'mfcc_delta':
71
- delta_mfccs = librosa.feature.delta(mfccs)
72
- delta2_mfccs = librosa.feature.delta(mfccs, order=2)
73
- features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
74
- print(f"Combined features shape: {features.shape}")
75
- elif feature_type == 'mfcc':
76
- features = mfccs
77
- else:
78
- features = mfccs # Fallback
79
-
80
- current_len = features.shape[1]
81
- if current_len > max_pad_len:
82
- features = features[:, :max_pad_len]
83
- print(f"Features truncated to {features.shape}")
84
- elif current_len < max_pad_len:
85
- pad_width = max_pad_len - current_len
86
- features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
87
- print(f"Features padded to {features.shape}")
88
-
89
- return features.T # Transpose to (time_steps, features)
90
- except Exception as e:
91
- print(f"Error extracting features: {e}")
92
- import traceback
93
- traceback.print_exc()
94
- return None
95
 
96
- def classify_language(audio_path):
97
- """Process audio file and classify language."""
98
- global model, scaler, config
99
-
100
- print(f"Processing audio file: {audio_path}")
101
-
102
- # Load artifacts if not loaded
103
- if model is None or scaler is None or config is None:
104
- if not load_artifacts():
105
- return "Error: Failed to load model artifacts"
106
-
 
 
 
 
 
 
 
 
 
107
  try:
108
- # Get configuration parameters
109
- n_mfcc = config.get('n_mfcc', 13)
110
- max_pad_len = config.get('max_pad_len', 100)
111
- feature_type = config.get('feature_type', 'mfcc_delta')
112
- class_labels = config.get('class_labels', [])
113
- n_features_expected = config.get('n_features_input', 39)
114
-
115
- print(f"Config parameters: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
116
- print(f"Expected features: {n_features_expected}, Classes: {class_labels}")
117
-
118
- # Load and process audio
119
- audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
120
- print(f"Loaded audio: duration={len(audio)/sample_rate:.2f}s, sample_rate={sample_rate}Hz")
121
-
122
- # Extract features
123
- features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
124
- if features is None:
125
- return "Error: Failed to extract audio features"
126
-
127
- # Verify feature dimensions
128
- print(f"Features shape: {features.shape}")
129
- if features.shape[1] != n_features_expected:
130
- return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
131
-
132
- # Scale features
133
- features_reshaped = features.reshape(-1, n_features_expected)
134
- print(f"Reshaped for scaling: {features_reshaped.shape}")
135
- features_scaled_reshaped = scaler.transform(features_reshaped)
136
- features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
137
- print(f"Final features shape for prediction: {features_final.shape}")
138
-
139
- # Predict
140
- print("Running prediction...")
141
- prediction_probabilities = model.predict(features_final, verbose=0)
142
- predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
143
- print(f"Prediction complete. Raw output shape: {prediction_probabilities.shape}")
144
- print(f"Predicted index: {predicted_index}")
145
-
146
- # Map to language label
147
- if 0 <= predicted_index < len(class_labels):
148
- predicted_language = class_labels[predicted_index]
149
- confidence = prediction_probabilities[0][predicted_index]
150
- print(f"Predicted language: {predicted_language}, Confidence: {confidence:.2%}")
151
-
152
- # Prepare results to display all probabilities
153
- results = []
154
- for i, lang in enumerate(class_labels):
155
- prob = prediction_probabilities[0][i]
156
- results.append(f"{lang}: {prob:.2%}")
157
-
158
- result_text = f"Predicted Language: {predicted_language} (Confidence: {confidence:.2%})\n\n"
159
- result_text += "All Predictions:\n" + "\n".join(results)
160
-
161
- return result_text
162
- else:
163
- return f"Error: Predicted index {predicted_index} out of bounds for labels (0-{len(class_labels)-1})"
164
-
165
  except Exception as e:
166
- import traceback
167
- error_msg = f"Error during classification: {e}\n{traceback.format_exc()}"
168
- print(error_msg)
169
- return f"Error: {str(e)}"
170
 
171
- # Create Gradio interface with additional information
172
  demo = gr.Interface(
173
- fn=classify_language,
174
- inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
175
- outputs="text",
176
- title="Indian Language Classifier",
177
- description="Upload or record audio in an Indian language, and the model will identify which language it is. "
178
- "The model supports multiple Indian languages as defined in the configuration file.",
179
- article="""
180
- ### Tips for Best Results
181
- - Speak clearly in one of the supported Indian languages
182
- - Try to record in a quiet environment
183
- - Recordings should be at least 2-3 seconds long for best results
184
-
185
- ### How it Works
186
- This model extracts MFCC features from your audio and uses a neural network
187
- trained on multiple Indian languages to predict which language you're speaking.
188
- """,
189
- examples=[], # You can add example audio files here if available
190
- cache_examples=False
191
  )
192
 
193
- # Load artifacts on startup to prevent cold start
194
- print("Initializing application...")
195
- load_artifacts()
196
- print("Application initialized successfully!")
197
-
198
- # Launch the app
199
  if __name__ == "__main__":
200
- demo.launch()
 
1
  import gradio as gr
2
  import numpy as np
3
  import librosa
 
 
 
4
  import joblib
5
+ import json
6
+ import tensorflow as tf
7
  from huggingface_hub import hf_hub_download
 
 
 
 
 
8
 
9
+ # Download artifacts from Hugging Face Hub
10
  REPO_ID = "hriteshMaikap/languageClassifier"
11
+ MODEL_FILENAME = "indic_language_classifier_mtm.keras"
12
+ SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
13
+ CONFIG_FILENAME = "config_mtm.json"
14
 
15
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
16
+ scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
17
+ config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
 
18
 
19
+ with open(config_path, "r") as f:
20
+ config = json.load(f)
21
+ n_mfcc = config["n_mfcc"]
22
+ max_pad_len = config["max_pad_len"]
23
+ feature_type = config["feature_type"]
24
+ class_labels = config["class_labels"]
25
+ sr = 22050 # Use the same sample rate as in your training
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ scaler = joblib.load(scaler_path)
28
+ model = tf.keras.models.load_model(model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ def extract_features(file_path, n_mfcc, max_pad_len, feature_type, sr):
31
+ audio, _ = librosa.load(file_path, sr=sr, mono=True, res_type='kaiser_fast')
32
+ mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
33
+ if feature_type == 'mfcc_delta':
34
+ delta_mfccs = librosa.feature.delta(mfccs)
35
+ delta2_mfccs = librosa.feature.delta(mfccs, order=2)
36
+ features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
37
+ elif feature_type == 'mfcc':
38
+ features = mfccs
39
+ else:
40
+ features = mfccs
41
+ current_len = features.shape[1]
42
+ if current_len > max_pad_len:
43
+ features = features[:, :max_pad_len]
44
+ elif current_len < max_pad_len:
45
+ pad_width = max_pad_len - current_len
46
+ features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
47
+ return features.T
48
+
49
+ def predict_language(audio_filepath):
50
  try:
51
+ features = extract_features(audio_filepath, n_mfcc, max_pad_len, feature_type, sr)
52
+ features_scaled = scaler.transform(features)
53
+ features_scaled = features_scaled[np.newaxis, :, :]
54
+ pred_probs = model.predict(features_scaled)
55
+ pred_idx = np.argmax(pred_probs, axis=1)[0]
56
+ pred_lang = class_labels[pred_idx]
57
+ confidence = float(pred_probs[0, pred_idx])
58
+ all_probs = {l: float(p) for l, p in zip(class_labels, pred_probs[0])}
59
+ prob_str = "\n".join([f"{l}: {p:.3f}" for l, p in all_probs.items()])
60
+ return f"**Prediction:** {pred_lang}\n**Confidence:** {confidence:.2%}\n\n**Class Probabilities:**\n{prob_str}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  except Exception as e:
62
+ return f"Error processing audio: {str(e)}"
 
 
 
63
 
 
64
  demo = gr.Interface(
65
+ fn=predict_language,
66
+ inputs=gr.Audio(type="filepath", label="Upload or record audio (.wav or .mp3)"),
67
+ outputs=gr.Markdown(),
68
+ title="Indic Language Classifier (Marathi, Telugu, Malayalam)",
69
+ description="Record or upload an audio sample. The model predicts the language (Marathi, Telugu, or Malayalam)."
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  )
71
 
 
 
 
 
 
 
72
  if __name__ == "__main__":
73
+ demo.launch(show_error=True)