Spaces:

hriteshMaikap
/

indic-languages-classifier

Build error

App Files Files Community

hriteshMaikap commited on 20 days ago

Commit

fa237f3

verified ·

1 Parent(s): 1fe94ae

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -169

app.py CHANGED Viewed

@@ -1,217 +1,155 @@
 import gradio as gr
-import tensorflow as tf
 import numpy as np
-import joblib
-import json
 import librosa
-import logging
 import os
-from datetime import datetime
 from huggingface_hub import hf_hub_download
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-# Print initialization info
-current_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
-print(f"--- Script Start Time (UTC): {current_time} ---")
-print(f"User: hriteshMaikap")
-print(f"TensorFlow Version: {tf.__version__}")
-print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")
-# Define repository and file information
 REPO_ID = "hriteshMaikap/languageClassifier"
-MODEL_FILENAME = "indic_language_classifier_mtm.keras"
-SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
-CONFIG_FILENAME = "config_mtm.json"
-def build_model(input_shape, num_classes):
-    """Recreate the model architecture"""
-    model = tf.keras.Sequential(name="Audio_CNN_1D_MTM")
-    # Input layer
-    model.add(tf.keras.layers.Input(shape=input_shape))
-    # Conv Block 1
-    model.add(tf.keras.layers.Conv1D(64, 5, strides=1, padding='same', name='Conv1D_1'))
-    model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_1'))
-    model.add(tf.keras.layers.Activation('relu', name='ReLU_1'))
-    model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_1'))
-    model.add(tf.keras.layers.Dropout(0.3, name='Dropout_1'))
-    # Conv Block 2
-    model.add(tf.keras.layers.Conv1D(128, 5, strides=1, padding='same', name='Conv1D_2'))
-    model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_2'))
-    model.add(tf.keras.layers.Activation('relu', name='ReLU_2'))
-    model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_2'))
-    model.add(tf.keras.layers.Dropout(0.3, name='Dropout_2'))
-    # Conv Block 3
-    model.add(tf.keras.layers.Conv1D(256, 5, strides=1, padding='same', name='Conv1D_3'))
-    model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_3'))
-    model.add(tf.keras.layers.Activation('relu', name='ReLU_3'))
-    model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_3'))
-    model.add(tf.keras.layers.Dropout(0.3, name='Dropout_3'))
-    # Flatten & Dense
-    model.add(tf.keras.layers.Flatten(name='Flatten'))
-    model.add(tf.keras.layers.Dense(256, name='Dense_1'))
-    model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_Dense1'))
-    model.add(tf.keras.layers.Activation('relu', name='ReLU_Dense1'))
-    model.add(tf.keras.layers.Dropout(0.5, name='Dropout_Dense1'))
-    # Output
-    model.add(tf.keras.layers.Dense(num_classes, activation='softmax', name='Output_Softmax'))
-    return model
-# Load resources
-try:
-    logger.info(f"Downloading resources from {REPO_ID}")
-    # Download files
-    model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
-    scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
-    config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
-    logger.info("Files downloaded successfully")
-    # Load config first
-    with open(config_path, 'r') as f:
-        config = json.load(f)
-    logger.info("Config loaded successfully")
-    # Load scaler
-    scaler = joblib.load(scaler_path)
-    logger.info("Scaler loaded successfully")
-    # Build and load model
-    input_shape = (config['max_pad_len'], config['n_features_input'])
-    num_classes = len(config['class_labels'])
-    # Create model with same architecture
-    model = build_model(input_shape, num_classes)
-    # Load weights from saved model
-    saved_model = tf.keras.models.load_model(model_path, compile=False)
-    model.set_weights(saved_model.get_weights())
-    logger.info("Model rebuilt and weights loaded successfully")
-except Exception as e:
-    logger.error(f"Error loading resources: {e}")
-    raise e
-def extract_features(file_path, n_mfcc, max_pad_len, feature_type):
-    """Extracts specified features, pads/truncates."""
     try:
-        audio, sample_rate = librosa.load(file_path, sr=None, res_type='kaiser_fast')
         mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
         if feature_type == 'mfcc_delta':
             delta_mfccs = librosa.feature.delta(mfccs)
             delta2_mfccs = librosa.feature.delta(mfccs, order=2)
             features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
-        else:
             features = mfccs
         current_len = features.shape[1]
         if current_len > max_pad_len:
             features = features[:, :max_pad_len]
-        else:
             pad_width = max_pad_len - current_len
             features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
-        return features.T
     except Exception as e:
-        logger.error(f"Error extracting features: {e}")
         return None
-def classify_language(audio_file):
-    """Main function to classify the language of an audio file"""
     try:
-        logger.info(f"Processing audio file of type: {type(audio_file)}")
-        # Extract features
-        features = extract_features(
-            audio_file,
-            config['n_mfcc'],
-            config['max_pad_len'],
-            config['feature_type']
-        )
         if features is None:
-            return "Error: Could not process audio file"
-        # Scale features
-        features_reshaped = features.reshape(-1, features.shape[1])
-        features_scaled = scaler.transform(features_reshaped)
-        features_scaled = features_scaled.reshape(1, config['max_pad_len'], -1)
-        # Make prediction
-        predictions = model.predict(features_scaled, verbose=0)[0]
-        # Format results
-        results = {
-            lang: float(prob)
-            for lang, prob in zip(config['class_labels'], predictions)
-        }
-        # Sort by probability
-        sorted_results = dict(sorted(
-            results.items(),
-            key=lambda x: x[1],
-            reverse=True
-        ))
-        return sorted_results
     except Exception as e:
-        logger.error(f"Error in classification: {e}")
         return f"Error: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     fn=classify_language,
-    inputs=gr.Audio(
-        type="filepath",
-        label="Audio Input",
-    ),
-    outputs=gr.Label(
-        num_top_classes=3,
-        label="Language Prediction"
-    ),
-    title="Indian Languages Audio Classifier",
-    description="""
-    ## Classify Audio in Indian Languages
-    This model identifies the language being spoken in an audio clip, choosing between:
-    - Marathi (मराठी)
-    - Telugu (తెలుగు)
-    - Malayalam (മലയാളം)
-    ### Instructions:
-    1. Click the microphone icon to record or upload an audio file
-    2. Submit to get the language classification
-    3. Results show confidence scores for each language
-    For best results:
-    - Use clear speech with minimal background noise
-    - Speak in one of the three supported languages
-    - Ensure good audio quality
-    """,
-    theme="huggingface",
-    allow_flagging="never"
 )
 # Launch the app
 if __name__ == "__main__":
-    demo.launch(
-        show_error=True,
-        share=False,
-        debug=True
-    )

 import gradio as gr
 import numpy as np
 import librosa
+import tensorflow as tf
+import json
 import os
+import joblib
 from huggingface_hub import hf_hub_download
+import warnings
+# Suppress specific warnings
+warnings.filterwarnings('ignore', category=FutureWarning, module='librosa')
+warnings.filterwarnings('ignore', category=UserWarning, module='librosa')
+# Model repository information
 REPO_ID = "hriteshMaikap/languageClassifier"
+MODEL_FILENAME = "indic_language_classifier.keras"
+SCALER_FILENAME = "audio_feature_scaler.pkl"
+CONFIG_FILENAME = "config.json"
+# Initialize global variables to store loaded artifacts
+model = None
+scaler = None
+config = None
+def load_artifacts():
+    """Load model, scaler and configuration from Hugging Face Hub."""
+    global model, scaler, config
+    try:
+        # Download files from Hugging Face Hub
+        model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
+        scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
+        config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
+        # Load model
+        model = tf.keras.models.load_model(model_path, compile=False)
+        # Load scaler
+        scaler = joblib.load(scaler_path)
+        # Load configuration
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+        return True
+    except Exception as e:
+        print(f"Error loading artifacts: {e}")
+        return False
+def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
+    """Extracts audio features directly from audio array."""
     try:
         mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
         if feature_type == 'mfcc_delta':
             delta_mfccs = librosa.feature.delta(mfccs)
             delta2_mfccs = librosa.feature.delta(mfccs, order=2)
             features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
+        elif feature_type == 'mfcc':
             features = mfccs
+        else:
+            features = mfccs  # Fallback
         current_len = features.shape[1]
         if current_len > max_pad_len:
             features = features[:, :max_pad_len]
+        elif current_len < max_pad_len:
             pad_width = max_pad_len - current_len
             features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
+        return features.T  # Transpose to (time_steps, features)
     except Exception as e:
+        print(f"Error extracting features: {e}")
         return None
+def classify_language(audio_path):
+    """Process audio file and classify language."""
+    global model, scaler, config
+    # Load artifacts if not loaded
+    if model is None or scaler is None or config is None:
+        if not load_artifacts():
+            return "Error: Failed to load model artifacts"
     try:
+        # Get configuration parameters
+        n_mfcc = config.get('n_mfcc', 13)
+        max_pad_len = config.get('max_pad_len', 100)
+        feature_type = config.get('feature_type', 'mfcc_delta')
+        class_labels = config.get('class_labels', [])
+        n_features_expected = config.get('n_features_input', 39)
+        # Load and process audio
+        audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
+        # Extract features
+        features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
         if features is None:
+            return "Error: Failed to extract audio features"
+        # Verify feature dimensions
+        if features.shape[1] != n_features_expected:
+            return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
+        # Scale features
+        features_reshaped = features.reshape(-1, n_features_expected)
+        features_scaled_reshaped = scaler.transform(features_reshaped)
+        features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
+        # Predict
+        prediction_probabilities = model.predict(features_final, verbose=0)
+        predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
+        # Map to language label
+        if 0 <= predicted_index < len(class_labels):
+            predicted_language = class_labels[predicted_index]
+            confidence = prediction_probabilities[0][predicted_index]
+            # Prepare results to display all probabilities
+            results = []
+            for i, lang in enumerate(class_labels):
+                prob = prediction_probabilities[0][i]
+                results.append(f"{lang}: {prob:.2%}")
+            result_text = f"Predicted Language: {predicted_language} (Confidence: {confidence:.2%})\n\n"
+            result_text += "All Predictions:\n" + "\n".join(results)
+            return result_text
+        else:
+            return f"Error: Predicted index {predicted_index} out of bounds for labels"
     except Exception as e:
+        import traceback
+        error_msg = f"Error during classification: {e}\n{traceback.format_exc()}"
+        print(error_msg)
         return f"Error: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     fn=classify_language,
+    inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
+    outputs="text",
+    title="Indian Language Classifier",
+    description="Upload or record audio in an Indian language, and the model will identify which language it is. Supported languages are defined in the configuration file.",
+    examples=[],  # You can add example audio files here if available
+    cache_examples=False
 )
+# Load artifacts on startup
+load_artifacts()
 # Launch the app
 if __name__ == "__main__":
+    demo.launch()