Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -14,9 +14,9 @@ warnings.filterwarnings('ignore', category=UserWarning, module='librosa')
|
|
14 |
|
15 |
# Model repository information
|
16 |
REPO_ID = "hriteshMaikap/languageClassifier"
|
17 |
-
MODEL_FILENAME = "indic_language_classifier_mtm.keras"
|
18 |
-
SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
|
19 |
-
CONFIG_FILENAME = "config_mtm.json"
|
20 |
|
21 |
# Initialize global variables to store loaded artifacts
|
22 |
model = None
|
@@ -28,35 +28,50 @@ def load_artifacts():
|
|
28 |
global model, scaler, config
|
29 |
|
30 |
try:
|
|
|
31 |
# Download files from Hugging Face Hub
|
32 |
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
|
33 |
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
|
34 |
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
|
35 |
|
|
|
|
|
|
|
|
|
36 |
# Load model
|
37 |
model = tf.keras.models.load_model(model_path, compile=False)
|
|
|
38 |
|
39 |
# Load scaler
|
40 |
scaler = joblib.load(scaler_path)
|
|
|
41 |
|
42 |
# Load configuration
|
43 |
with open(config_path, 'r') as f:
|
44 |
config = json.load(f)
|
|
|
45 |
|
46 |
return True
|
47 |
except Exception as e:
|
48 |
print(f"Error loading artifacts: {e}")
|
|
|
|
|
49 |
return False
|
50 |
|
51 |
def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
|
52 |
"""Extracts audio features directly from audio array."""
|
53 |
try:
|
|
|
|
|
|
|
54 |
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
|
|
|
55 |
|
56 |
if feature_type == 'mfcc_delta':
|
57 |
delta_mfccs = librosa.feature.delta(mfccs)
|
58 |
delta2_mfccs = librosa.feature.delta(mfccs, order=2)
|
59 |
features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
|
|
|
60 |
elif feature_type == 'mfcc':
|
61 |
features = mfccs
|
62 |
else:
|
@@ -65,19 +80,25 @@ def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
|
|
65 |
current_len = features.shape[1]
|
66 |
if current_len > max_pad_len:
|
67 |
features = features[:, :max_pad_len]
|
|
|
68 |
elif current_len < max_pad_len:
|
69 |
pad_width = max_pad_len - current_len
|
70 |
features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
|
|
|
71 |
|
72 |
return features.T # Transpose to (time_steps, features)
|
73 |
except Exception as e:
|
74 |
print(f"Error extracting features: {e}")
|
|
|
|
|
75 |
return None
|
76 |
|
77 |
def classify_language(audio_path):
|
78 |
"""Process audio file and classify language."""
|
79 |
global model, scaler, config
|
80 |
|
|
|
|
|
81 |
# Load artifacts if not loaded
|
82 |
if model is None or scaler is None or config is None:
|
83 |
if not load_artifacts():
|
@@ -91,8 +112,12 @@ def classify_language(audio_path):
|
|
91 |
class_labels = config.get('class_labels', [])
|
92 |
n_features_expected = config.get('n_features_input', 39)
|
93 |
|
|
|
|
|
|
|
94 |
# Load and process audio
|
95 |
audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
|
|
|
96 |
|
97 |
# Extract features
|
98 |
features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
|
@@ -100,22 +125,29 @@ def classify_language(audio_path):
|
|
100 |
return "Error: Failed to extract audio features"
|
101 |
|
102 |
# Verify feature dimensions
|
|
|
103 |
if features.shape[1] != n_features_expected:
|
104 |
return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
|
105 |
|
106 |
# Scale features
|
107 |
features_reshaped = features.reshape(-1, n_features_expected)
|
|
|
108 |
features_scaled_reshaped = scaler.transform(features_reshaped)
|
109 |
features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
|
|
|
110 |
|
111 |
# Predict
|
|
|
112 |
prediction_probabilities = model.predict(features_final, verbose=0)
|
113 |
predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
|
|
|
|
|
114 |
|
115 |
# Map to language label
|
116 |
if 0 <= predicted_index < len(class_labels):
|
117 |
predicted_language = class_labels[predicted_index]
|
118 |
confidence = prediction_probabilities[0][predicted_index]
|
|
|
119 |
|
120 |
# Prepare results to display all probabilities
|
121 |
results = []
|
@@ -128,7 +160,7 @@ def classify_language(audio_path):
|
|
128 |
|
129 |
return result_text
|
130 |
else:
|
131 |
-
return f"Error: Predicted index {predicted_index} out of bounds for labels"
|
132 |
|
133 |
except Exception as e:
|
134 |
import traceback
|
@@ -136,19 +168,32 @@ def classify_language(audio_path):
|
|
136 |
print(error_msg)
|
137 |
return f"Error: {str(e)}"
|
138 |
|
139 |
-
# Create Gradio interface
|
140 |
demo = gr.Interface(
|
141 |
fn=classify_language,
|
142 |
inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
|
143 |
outputs="text",
|
144 |
title="Indian Language Classifier",
|
145 |
-
description="Upload or record audio in an Indian language, and the model will identify which language it is.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
examples=[], # You can add example audio files here if available
|
147 |
cache_examples=False
|
148 |
)
|
149 |
|
150 |
-
# Load artifacts on startup
|
|
|
151 |
load_artifacts()
|
|
|
152 |
|
153 |
# Launch the app
|
154 |
if __name__ == "__main__":
|
|
|
14 |
|
15 |
# Model repository information
|
16 |
REPO_ID = "hriteshMaikap/languageClassifier"
|
17 |
+
MODEL_FILENAME = "indic_language_classifier_mtm.keras" # Updated filename
|
18 |
+
SCALER_FILENAME = "audio_feature_scaler_mtm.pkl" # Updated filename
|
19 |
+
CONFIG_FILENAME = "config_mtm.json" # Updated filename
|
20 |
|
21 |
# Initialize global variables to store loaded artifacts
|
22 |
model = None
|
|
|
28 |
global model, scaler, config
|
29 |
|
30 |
try:
|
31 |
+
print(f"Loading artifacts from {REPO_ID}...")
|
32 |
# Download files from Hugging Face Hub
|
33 |
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
|
34 |
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
|
35 |
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
|
36 |
|
37 |
+
print(f"Model path: {model_path}")
|
38 |
+
print(f"Scaler path: {scaler_path}")
|
39 |
+
print(f"Config path: {config_path}")
|
40 |
+
|
41 |
# Load model
|
42 |
model = tf.keras.models.load_model(model_path, compile=False)
|
43 |
+
print("Model loaded successfully")
|
44 |
|
45 |
# Load scaler
|
46 |
scaler = joblib.load(scaler_path)
|
47 |
+
print("Scaler loaded successfully")
|
48 |
|
49 |
# Load configuration
|
50 |
with open(config_path, 'r') as f:
|
51 |
config = json.load(f)
|
52 |
+
print(f"Config loaded successfully: {config.keys()}")
|
53 |
|
54 |
return True
|
55 |
except Exception as e:
|
56 |
print(f"Error loading artifacts: {e}")
|
57 |
+
import traceback
|
58 |
+
traceback.print_exc()
|
59 |
return False
|
60 |
|
61 |
def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
|
62 |
"""Extracts audio features directly from audio array."""
|
63 |
try:
|
64 |
+
print(f"Extracting features: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
|
65 |
+
print(f"Audio shape: {audio.shape if hasattr(audio, 'shape') else 'unknown'}, Sample rate: {sample_rate}")
|
66 |
+
|
67 |
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
|
68 |
+
print(f"MFCC shape: {mfccs.shape}")
|
69 |
|
70 |
if feature_type == 'mfcc_delta':
|
71 |
delta_mfccs = librosa.feature.delta(mfccs)
|
72 |
delta2_mfccs = librosa.feature.delta(mfccs, order=2)
|
73 |
features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
|
74 |
+
print(f"Combined features shape: {features.shape}")
|
75 |
elif feature_type == 'mfcc':
|
76 |
features = mfccs
|
77 |
else:
|
|
|
80 |
current_len = features.shape[1]
|
81 |
if current_len > max_pad_len:
|
82 |
features = features[:, :max_pad_len]
|
83 |
+
print(f"Features truncated to {features.shape}")
|
84 |
elif current_len < max_pad_len:
|
85 |
pad_width = max_pad_len - current_len
|
86 |
features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
|
87 |
+
print(f"Features padded to {features.shape}")
|
88 |
|
89 |
return features.T # Transpose to (time_steps, features)
|
90 |
except Exception as e:
|
91 |
print(f"Error extracting features: {e}")
|
92 |
+
import traceback
|
93 |
+
traceback.print_exc()
|
94 |
return None
|
95 |
|
96 |
def classify_language(audio_path):
|
97 |
"""Process audio file and classify language."""
|
98 |
global model, scaler, config
|
99 |
|
100 |
+
print(f"Processing audio file: {audio_path}")
|
101 |
+
|
102 |
# Load artifacts if not loaded
|
103 |
if model is None or scaler is None or config is None:
|
104 |
if not load_artifacts():
|
|
|
112 |
class_labels = config.get('class_labels', [])
|
113 |
n_features_expected = config.get('n_features_input', 39)
|
114 |
|
115 |
+
print(f"Config parameters: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
|
116 |
+
print(f"Expected features: {n_features_expected}, Classes: {class_labels}")
|
117 |
+
|
118 |
# Load and process audio
|
119 |
audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
|
120 |
+
print(f"Loaded audio: duration={len(audio)/sample_rate:.2f}s, sample_rate={sample_rate}Hz")
|
121 |
|
122 |
# Extract features
|
123 |
features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
|
|
|
125 |
return "Error: Failed to extract audio features"
|
126 |
|
127 |
# Verify feature dimensions
|
128 |
+
print(f"Features shape: {features.shape}")
|
129 |
if features.shape[1] != n_features_expected:
|
130 |
return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
|
131 |
|
132 |
# Scale features
|
133 |
features_reshaped = features.reshape(-1, n_features_expected)
|
134 |
+
print(f"Reshaped for scaling: {features_reshaped.shape}")
|
135 |
features_scaled_reshaped = scaler.transform(features_reshaped)
|
136 |
features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
|
137 |
+
print(f"Final features shape for prediction: {features_final.shape}")
|
138 |
|
139 |
# Predict
|
140 |
+
print("Running prediction...")
|
141 |
prediction_probabilities = model.predict(features_final, verbose=0)
|
142 |
predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
|
143 |
+
print(f"Prediction complete. Raw output shape: {prediction_probabilities.shape}")
|
144 |
+
print(f"Predicted index: {predicted_index}")
|
145 |
|
146 |
# Map to language label
|
147 |
if 0 <= predicted_index < len(class_labels):
|
148 |
predicted_language = class_labels[predicted_index]
|
149 |
confidence = prediction_probabilities[0][predicted_index]
|
150 |
+
print(f"Predicted language: {predicted_language}, Confidence: {confidence:.2%}")
|
151 |
|
152 |
# Prepare results to display all probabilities
|
153 |
results = []
|
|
|
160 |
|
161 |
return result_text
|
162 |
else:
|
163 |
+
return f"Error: Predicted index {predicted_index} out of bounds for labels (0-{len(class_labels)-1})"
|
164 |
|
165 |
except Exception as e:
|
166 |
import traceback
|
|
|
168 |
print(error_msg)
|
169 |
return f"Error: {str(e)}"
|
170 |
|
171 |
+
# Create Gradio interface with additional information
|
172 |
demo = gr.Interface(
|
173 |
fn=classify_language,
|
174 |
inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
|
175 |
outputs="text",
|
176 |
title="Indian Language Classifier",
|
177 |
+
description="Upload or record audio in an Indian language, and the model will identify which language it is. "
|
178 |
+
"The model supports multiple Indian languages as defined in the configuration file.",
|
179 |
+
article="""
|
180 |
+
### Tips for Best Results
|
181 |
+
- Speak clearly in one of the supported Indian languages
|
182 |
+
- Try to record in a quiet environment
|
183 |
+
- Recordings should be at least 2-3 seconds long for best results
|
184 |
+
|
185 |
+
### How it Works
|
186 |
+
This model extracts MFCC features from your audio and uses a neural network
|
187 |
+
trained on multiple Indian languages to predict which language you're speaking.
|
188 |
+
""",
|
189 |
examples=[], # You can add example audio files here if available
|
190 |
cache_examples=False
|
191 |
)
|
192 |
|
193 |
+
# Load artifacts on startup to prevent cold start
|
194 |
+
print("Initializing application...")
|
195 |
load_artifacts()
|
196 |
+
print("Application initialized successfully!")
|
197 |
|
198 |
# Launch the app
|
199 |
if __name__ == "__main__":
|