Spaces:

hriteshMaikap
/

indic-languages-classifier

Build error

App Files Files Community

hriteshMaikap commited on Apr 19

Commit

f37bdc4

verified ·

1 Parent(s): af85ae1

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -7

app.py CHANGED Viewed

@@ -14,9 +14,9 @@ warnings.filterwarnings('ignore', category=UserWarning, module='librosa')
 # Model repository information
 REPO_ID = "hriteshMaikap/languageClassifier"
-MODEL_FILENAME = "indic_language_classifier_mtm.keras"
-SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
-CONFIG_FILENAME = "config_mtm.json"
 # Initialize global variables to store loaded artifacts
 model = None
@@ -28,35 +28,50 @@ def load_artifacts():
     global model, scaler, config
     try:
         # Download files from Hugging Face Hub
         model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
         scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
         config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
         # Load model
         model = tf.keras.models.load_model(model_path, compile=False)
         # Load scaler
         scaler = joblib.load(scaler_path)
         # Load configuration
         with open(config_path, 'r') as f:
             config = json.load(f)
         return True
     except Exception as e:
         print(f"Error loading artifacts: {e}")
         return False
 def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
     """Extracts audio features directly from audio array."""
     try:
         mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
         if feature_type == 'mfcc_delta':
             delta_mfccs = librosa.feature.delta(mfccs)
             delta2_mfccs = librosa.feature.delta(mfccs, order=2)
             features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
         elif feature_type == 'mfcc':
             features = mfccs
         else:
@@ -65,19 +80,25 @@ def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
         current_len = features.shape[1]
         if current_len > max_pad_len:
             features = features[:, :max_pad_len]
         elif current_len < max_pad_len:
             pad_width = max_pad_len - current_len
             features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
         return features.T  # Transpose to (time_steps, features)
     except Exception as e:
         print(f"Error extracting features: {e}")
         return None
 def classify_language(audio_path):
     """Process audio file and classify language."""
     global model, scaler, config
     # Load artifacts if not loaded
     if model is None or scaler is None or config is None:
         if not load_artifacts():
@@ -91,8 +112,12 @@ def classify_language(audio_path):
         class_labels = config.get('class_labels', [])
         n_features_expected = config.get('n_features_input', 39)
         # Load and process audio
         audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
         # Extract features
         features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
@@ -100,22 +125,29 @@ def classify_language(audio_path):
             return "Error: Failed to extract audio features"
         # Verify feature dimensions
         if features.shape[1] != n_features_expected:
             return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
         # Scale features
         features_reshaped = features.reshape(-1, n_features_expected)
         features_scaled_reshaped = scaler.transform(features_reshaped)
         features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
         # Predict
         prediction_probabilities = model.predict(features_final, verbose=0)
         predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
         # Map to language label
         if 0 <= predicted_index < len(class_labels):
             predicted_language = class_labels[predicted_index]
             confidence = prediction_probabilities[0][predicted_index]
             # Prepare results to display all probabilities
             results = []
@@ -128,7 +160,7 @@ def classify_language(audio_path):
             return result_text
         else:
-            return f"Error: Predicted index {predicted_index} out of bounds for labels"
     except Exception as e:
         import traceback
@@ -136,19 +168,32 @@ def classify_language(audio_path):
         print(error_msg)
         return f"Error: {str(e)}"
-# Create Gradio interface
 demo = gr.Interface(
     fn=classify_language,
     inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
     outputs="text",
     title="Indian Language Classifier",
-    description="Upload or record audio in an Indian language, and the model will identify which language it is. Supported languages are defined in the configuration file.",
     examples=[],  # You can add example audio files here if available
     cache_examples=False
 )
-# Load artifacts on startup
 load_artifacts()
 # Launch the app
 if __name__ == "__main__":

 # Model repository information
 REPO_ID = "hriteshMaikap/languageClassifier"
+MODEL_FILENAME = "indic_language_classifier_mtm.keras"  # Updated filename
+SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"  # Updated filename
+CONFIG_FILENAME = "config_mtm.json"  # Updated filename
 # Initialize global variables to store loaded artifacts
 model = None
     global model, scaler, config
     try:
+        print(f"Loading artifacts from {REPO_ID}...")
         # Download files from Hugging Face Hub
         model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
         scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
         config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
+        print(f"Model path: {model_path}")
+        print(f"Scaler path: {scaler_path}")
+        print(f"Config path: {config_path}")
         # Load model
         model = tf.keras.models.load_model(model_path, compile=False)
+        print("Model loaded successfully")
         # Load scaler
         scaler = joblib.load(scaler_path)
+        print("Scaler loaded successfully")
         # Load configuration
         with open(config_path, 'r') as f:
             config = json.load(f)
+        print(f"Config loaded successfully: {config.keys()}")
         return True
     except Exception as e:
         print(f"Error loading artifacts: {e}")
+        import traceback
+        traceback.print_exc()
         return False
 def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
     """Extracts audio features directly from audio array."""
     try:
+        print(f"Extracting features: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
+        print(f"Audio shape: {audio.shape if hasattr(audio, 'shape') else 'unknown'}, Sample rate: {sample_rate}")
         mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
+        print(f"MFCC shape: {mfccs.shape}")
         if feature_type == 'mfcc_delta':
             delta_mfccs = librosa.feature.delta(mfccs)
             delta2_mfccs = librosa.feature.delta(mfccs, order=2)
             features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
+            print(f"Combined features shape: {features.shape}")
         elif feature_type == 'mfcc':
             features = mfccs
         else:
         current_len = features.shape[1]
         if current_len > max_pad_len:
             features = features[:, :max_pad_len]
+            print(f"Features truncated to {features.shape}")
         elif current_len < max_pad_len:
             pad_width = max_pad_len - current_len
             features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
+            print(f"Features padded to {features.shape}")
         return features.T  # Transpose to (time_steps, features)
     except Exception as e:
         print(f"Error extracting features: {e}")
+        import traceback
+        traceback.print_exc()
         return None
 def classify_language(audio_path):
     """Process audio file and classify language."""
     global model, scaler, config
+    print(f"Processing audio file: {audio_path}")
     # Load artifacts if not loaded
     if model is None or scaler is None or config is None:
         if not load_artifacts():
         class_labels = config.get('class_labels', [])
         n_features_expected = config.get('n_features_input', 39)
+        print(f"Config parameters: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
+        print(f"Expected features: {n_features_expected}, Classes: {class_labels}")
         # Load and process audio
         audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
+        print(f"Loaded audio: duration={len(audio)/sample_rate:.2f}s, sample_rate={sample_rate}Hz")
         # Extract features
         features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
             return "Error: Failed to extract audio features"
         # Verify feature dimensions
+        print(f"Features shape: {features.shape}")
         if features.shape[1] != n_features_expected:
             return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
         # Scale features
         features_reshaped = features.reshape(-1, n_features_expected)
+        print(f"Reshaped for scaling: {features_reshaped.shape}")
         features_scaled_reshaped = scaler.transform(features_reshaped)
         features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
+        print(f"Final features shape for prediction: {features_final.shape}")
         # Predict
+        print("Running prediction...")
         prediction_probabilities = model.predict(features_final, verbose=0)
         predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
+        print(f"Prediction complete. Raw output shape: {prediction_probabilities.shape}")
+        print(f"Predicted index: {predicted_index}")
         # Map to language label
         if 0 <= predicted_index < len(class_labels):
             predicted_language = class_labels[predicted_index]
             confidence = prediction_probabilities[0][predicted_index]
+            print(f"Predicted language: {predicted_language}, Confidence: {confidence:.2%}")
             # Prepare results to display all probabilities
             results = []
             return result_text
         else:
+            return f"Error: Predicted index {predicted_index} out of bounds for labels (0-{len(class_labels)-1})"
     except Exception as e:
         import traceback
         print(error_msg)
         return f"Error: {str(e)}"
+# Create Gradio interface with additional information
 demo = gr.Interface(
     fn=classify_language,
     inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
     outputs="text",
     title="Indian Language Classifier",
+    description="Upload or record audio in an Indian language, and the model will identify which language it is. "
+                "The model supports multiple Indian languages as defined in the configuration file.",
+    article="""
+    ### Tips for Best Results
+    - Speak clearly in one of the supported Indian languages
+    - Try to record in a quiet environment
+    - Recordings should be at least 2-3 seconds long for best results
+    ### How it Works
+    This model extracts MFCC features from your audio and uses a neural network
+    trained on multiple Indian languages to predict which language you're speaking.
+    """,
     examples=[],  # You can add example audio files here if available
     cache_examples=False
 )
+# Load artifacts on startup to prevent cold start
+print("Initializing application...")
 load_artifacts()
+print("Application initialized successfully!")
 # Launch the app
 if __name__ == "__main__":