Spaces:

hriteshMaikap
/

indic-languages-classifier

Build error

App Files Files Community

hriteshMaikap commited on 16 days ago

Commit

c475932

verified ·

1 Parent(s): f37bdc4

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -182

app.py CHANGED Viewed

@@ -1,200 +1,73 @@
 import gradio as gr
 import numpy as np
 import librosa
-import tensorflow as tf
-import json
-import os
 import joblib
 from huggingface_hub import hf_hub_download
-import warnings
-# Suppress specific warnings
-warnings.filterwarnings('ignore', category=FutureWarning, module='librosa')
-warnings.filterwarnings('ignore', category=UserWarning, module='librosa')
-# Model repository information
 REPO_ID = "hriteshMaikap/languageClassifier"
-MODEL_FILENAME = "indic_language_classifier_mtm.keras"  # Updated filename
-SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"  # Updated filename
-CONFIG_FILENAME = "config_mtm.json"  # Updated filename
-# Initialize global variables to store loaded artifacts
-model = None
-scaler = None
-config = None
-def load_artifacts():
-    """Load model, scaler and configuration from Hugging Face Hub."""
-    global model, scaler, config
-    try:
-        print(f"Loading artifacts from {REPO_ID}...")
-        # Download files from Hugging Face Hub
-        model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
-        scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
-        config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
-        print(f"Model path: {model_path}")
-        print(f"Scaler path: {scaler_path}")
-        print(f"Config path: {config_path}")
-        # Load model
-        model = tf.keras.models.load_model(model_path, compile=False)
-        print("Model loaded successfully")
-        # Load scaler
-        scaler = joblib.load(scaler_path)
-        print("Scaler loaded successfully")
-        # Load configuration
-        with open(config_path, 'r') as f:
-            config = json.load(f)
-        print(f"Config loaded successfully: {config.keys()}")
-        return True
-    except Exception as e:
-        print(f"Error loading artifacts: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
-    """Extracts audio features directly from audio array."""
-    try:
-        print(f"Extracting features: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
-        print(f"Audio shape: {audio.shape if hasattr(audio, 'shape') else 'unknown'}, Sample rate: {sample_rate}")
-        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
-        print(f"MFCC shape: {mfccs.shape}")
-        if feature_type == 'mfcc_delta':
-            delta_mfccs = librosa.feature.delta(mfccs)
-            delta2_mfccs = librosa.feature.delta(mfccs, order=2)
-            features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
-            print(f"Combined features shape: {features.shape}")
-        elif feature_type == 'mfcc':
-            features = mfccs
-        else:
-            features = mfccs  # Fallback
-        current_len = features.shape[1]
-        if current_len > max_pad_len:
-            features = features[:, :max_pad_len]
-            print(f"Features truncated to {features.shape}")
-        elif current_len < max_pad_len:
-            pad_width = max_pad_len - current_len
-            features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
-            print(f"Features padded to {features.shape}")
-        return features.T  # Transpose to (time_steps, features)
-    except Exception as e:
-        print(f"Error extracting features: {e}")
-        import traceback
-        traceback.print_exc()
-        return None
-def classify_language(audio_path):
-    """Process audio file and classify language."""
-    global model, scaler, config
-    print(f"Processing audio file: {audio_path}")
-    # Load artifacts if not loaded
-    if model is None or scaler is None or config is None:
-        if not load_artifacts():
-            return "Error: Failed to load model artifacts"
     try:
-        # Get configuration parameters
-        n_mfcc = config.get('n_mfcc', 13)
-        max_pad_len = config.get('max_pad_len', 100)
-        feature_type = config.get('feature_type', 'mfcc_delta')
-        class_labels = config.get('class_labels', [])
-        n_features_expected = config.get('n_features_input', 39)
-        print(f"Config parameters: n_mfcc={n_mfcc}, max_pad_len={max_pad_len}, feature_type={feature_type}")
-        print(f"Expected features: {n_features_expected}, Classes: {class_labels}")
-        # Load and process audio
-        audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
-        print(f"Loaded audio: duration={len(audio)/sample_rate:.2f}s, sample_rate={sample_rate}Hz")
-        # Extract features
-        features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
-        if features is None:
-            return "Error: Failed to extract audio features"
-        # Verify feature dimensions
-        print(f"Features shape: {features.shape}")
-        if features.shape[1] != n_features_expected:
-            return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
-        # Scale features
-        features_reshaped = features.reshape(-1, n_features_expected)
-        print(f"Reshaped for scaling: {features_reshaped.shape}")
-        features_scaled_reshaped = scaler.transform(features_reshaped)
-        features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
-        print(f"Final features shape for prediction: {features_final.shape}")
-        # Predict
-        print("Running prediction...")
-        prediction_probabilities = model.predict(features_final, verbose=0)
-        predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
-        print(f"Prediction complete. Raw output shape: {prediction_probabilities.shape}")
-        print(f"Predicted index: {predicted_index}")
-        # Map to language label
-        if 0 <= predicted_index < len(class_labels):
-            predicted_language = class_labels[predicted_index]
-            confidence = prediction_probabilities[0][predicted_index]
-            print(f"Predicted language: {predicted_language}, Confidence: {confidence:.2%}")
-            # Prepare results to display all probabilities
-            results = []
-            for i, lang in enumerate(class_labels):
-                prob = prediction_probabilities[0][i]
-                results.append(f"{lang}: {prob:.2%}")
-            result_text = f"Predicted Language: {predicted_language} (Confidence: {confidence:.2%})\n\n"
-            result_text += "All Predictions:\n" + "\n".join(results)
-            return result_text
-        else:
-            return f"Error: Predicted index {predicted_index} out of bounds for labels (0-{len(class_labels)-1})"
     except Exception as e:
-        import traceback
-        error_msg = f"Error during classification: {e}\n{traceback.format_exc()}"
-        print(error_msg)
-        return f"Error: {str(e)}"
-# Create Gradio interface with additional information
 demo = gr.Interface(
-    fn=classify_language,
-    inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
-    outputs="text",
-    title="Indian Language Classifier",
-    description="Upload or record audio in an Indian language, and the model will identify which language it is. "
-                "The model supports multiple Indian languages as defined in the configuration file.",
-    article="""
-    ### Tips for Best Results
-    - Speak clearly in one of the supported Indian languages
-    - Try to record in a quiet environment
-    - Recordings should be at least 2-3 seconds long for best results
-    ### How it Works
-    This model extracts MFCC features from your audio and uses a neural network
-    trained on multiple Indian languages to predict which language you're speaking.
-    """,
-    examples=[],  # You can add example audio files here if available
-    cache_examples=False
 )
-# Load artifacts on startup to prevent cold start
-print("Initializing application...")
-load_artifacts()
-print("Application initialized successfully!")
-# Launch the app
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import numpy as np
 import librosa
 import joblib
+import json
+import tensorflow as tf
 from huggingface_hub import hf_hub_download
+# Download artifacts from Hugging Face Hub
 REPO_ID = "hriteshMaikap/languageClassifier"
+MODEL_FILENAME = "indic_language_classifier_mtm.keras"
+SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
+CONFIG_FILENAME = "config_mtm.json"
+model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
+scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
+config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
+with open(config_path, "r") as f:
+    config = json.load(f)
+n_mfcc = config["n_mfcc"]
+max_pad_len = config["max_pad_len"]
+feature_type = config["feature_type"]
+class_labels = config["class_labels"]
+sr = 22050  # Use the same sample rate as in your training
+scaler = joblib.load(scaler_path)
+model = tf.keras.models.load_model(model_path)
+def extract_features(file_path, n_mfcc, max_pad_len, feature_type, sr):
+    audio, _ = librosa.load(file_path, sr=sr, mono=True, res_type='kaiser_fast')
+    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
+    if feature_type == 'mfcc_delta':
+        delta_mfccs = librosa.feature.delta(mfccs)
+        delta2_mfccs = librosa.feature.delta(mfccs, order=2)
+        features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
+    elif feature_type == 'mfcc':
+        features = mfccs
+    else:
+        features = mfccs
+    current_len = features.shape[1]
+    if current_len > max_pad_len:
+        features = features[:, :max_pad_len]
+    elif current_len < max_pad_len:
+        pad_width = max_pad_len - current_len
+        features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
+    return features.T
+def predict_language(audio_filepath):
     try:
+        features = extract_features(audio_filepath, n_mfcc, max_pad_len, feature_type, sr)
+        features_scaled = scaler.transform(features)
+        features_scaled = features_scaled[np.newaxis, :, :]
+        pred_probs = model.predict(features_scaled)
+        pred_idx = np.argmax(pred_probs, axis=1)[0]
+        pred_lang = class_labels[pred_idx]
+        confidence = float(pred_probs[0, pred_idx])
+        all_probs = {l: float(p) for l, p in zip(class_labels, pred_probs[0])}
+        prob_str = "\n".join([f"{l}: {p:.3f}" for l, p in all_probs.items()])
+        return f"**Prediction:** {pred_lang}\n**Confidence:** {confidence:.2%}\n\n**Class Probabilities:**\n{prob_str}"
     except Exception as e:
+        return f"Error processing audio: {str(e)}"
 demo = gr.Interface(
+    fn=predict_language,
+    inputs=gr.Audio(type="filepath", label="Upload or record audio (.wav or .mp3)"),
+    outputs=gr.Markdown(),
+    title="Indic Language Classifier (Marathi, Telugu, Malayalam)",
+    description="Record or upload an audio sample. The model predicts the language (Marathi, Telugu, or Malayalam)."
 )
 if __name__ == "__main__":
+    demo.launch(show_error=True)