File size: 5,851 Bytes
018dafc
6fd65bb
918e6cd
fa237f3
 
918e6cd
fa237f3
a9ae1dc
fa237f3
918e6cd
fa237f3
 
 
918e6cd
fa237f3
a9ae1dc
fa237f3
 
 
a9ae1dc
fa237f3
 
 
 
1fe94ae
fa237f3
 
 
1fe94ae
fa237f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fd65bb
fa237f3
 
6fd65bb
a9ae1dc
918e6cd
a9ae1dc
918e6cd
 
 
fa237f3
1fe94ae
fa237f3
 
 
1fe94ae
 
a9ae1dc
fa237f3
1fe94ae
918e6cd
a9ae1dc
fa237f3
6fd65bb
fa237f3
918e6cd
32c9322
fa237f3
 
 
 
 
 
 
 
 
6fd65bb
fa237f3
 
 
 
 
 
6fd65bb
fa237f3
 
a9ae1dc
fa237f3
 
918e6cd
fa237f3
6fd65bb
fa237f3
 
 
6fd65bb
fa237f3
 
 
 
6fd65bb
fa237f3
 
 
6fd65bb
fa237f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fd65bb
 
fa237f3
 
 
918e6cd
6fd65bb
918e6cd
018dafc
6fd65bb
fa237f3
 
 
 
 
 
018dafc
 
fa237f3
 
 
6fd65bb
018dafc
fa237f3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import gradio as gr
import numpy as np
import librosa
import tensorflow as tf
import json
import os
import joblib
from huggingface_hub import hf_hub_download
import warnings

# Suppress specific warnings
warnings.filterwarnings('ignore', category=FutureWarning, module='librosa')
warnings.filterwarnings('ignore', category=UserWarning, module='librosa')

# Model repository information
REPO_ID = "hriteshMaikap/languageClassifier"
MODEL_FILENAME = "indic_language_classifier.keras"
SCALER_FILENAME = "audio_feature_scaler.pkl"
CONFIG_FILENAME = "config.json"

# Initialize global variables to store loaded artifacts
model = None
scaler = None
config = None

def load_artifacts():
    """Load model, scaler and configuration from Hugging Face Hub."""
    global model, scaler, config
    
    try:
        # Download files from Hugging Face Hub
        model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
        scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
        config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
        
        # Load model
        model = tf.keras.models.load_model(model_path, compile=False)
        
        # Load scaler
        scaler = joblib.load(scaler_path)
        
        # Load configuration
        with open(config_path, 'r') as f:
            config = json.load(f)
        
        return True
    except Exception as e:
        print(f"Error loading artifacts: {e}")
        return False

def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
    """Extracts audio features directly from audio array."""
    try:
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
        
        if feature_type == 'mfcc_delta':
            delta_mfccs = librosa.feature.delta(mfccs)
            delta2_mfccs = librosa.feature.delta(mfccs, order=2)
            features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
        elif feature_type == 'mfcc':
            features = mfccs
        else:
            features = mfccs  # Fallback
            
        current_len = features.shape[1]
        if current_len > max_pad_len:
            features = features[:, :max_pad_len]
        elif current_len < max_pad_len:
            pad_width = max_pad_len - current_len
            features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
            
        return features.T  # Transpose to (time_steps, features)
    except Exception as e:
        print(f"Error extracting features: {e}")
        return None

def classify_language(audio_path):
    """Process audio file and classify language."""
    global model, scaler, config
    
    # Load artifacts if not loaded
    if model is None or scaler is None or config is None:
        if not load_artifacts():
            return "Error: Failed to load model artifacts"
    
    try:
        # Get configuration parameters
        n_mfcc = config.get('n_mfcc', 13)
        max_pad_len = config.get('max_pad_len', 100)
        feature_type = config.get('feature_type', 'mfcc_delta')
        class_labels = config.get('class_labels', [])
        n_features_expected = config.get('n_features_input', 39)
        
        # Load and process audio
        audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
        
        # Extract features
        features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
        if features is None:
            return "Error: Failed to extract audio features"
        
        # Verify feature dimensions
        if features.shape[1] != n_features_expected:
            return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
        
        # Scale features
        features_reshaped = features.reshape(-1, n_features_expected)
        features_scaled_reshaped = scaler.transform(features_reshaped)
        features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
        
        # Predict
        prediction_probabilities = model.predict(features_final, verbose=0)
        predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
        
        # Map to language label
        if 0 <= predicted_index < len(class_labels):
            predicted_language = class_labels[predicted_index]
            confidence = prediction_probabilities[0][predicted_index]
            
            # Prepare results to display all probabilities
            results = []
            for i, lang in enumerate(class_labels):
                prob = prediction_probabilities[0][i]
                results.append(f"{lang}: {prob:.2%}")
            
            result_text = f"Predicted Language: {predicted_language} (Confidence: {confidence:.2%})\n\n"
            result_text += "All Predictions:\n" + "\n".join(results)
            
            return result_text
        else:
            return f"Error: Predicted index {predicted_index} out of bounds for labels"
        
    except Exception as e:
        import traceback
        error_msg = f"Error during classification: {e}\n{traceback.format_exc()}"
        print(error_msg)
        return f"Error: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=classify_language,
    inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
    outputs="text",
    title="Indian Language Classifier",
    description="Upload or record audio in an Indian language, and the model will identify which language it is. Supported languages are defined in the configuration file.",
    examples=[],  # You can add example audio files here if available
    cache_examples=False
)

# Load artifacts on startup
load_artifacts()

# Launch the app
if __name__ == "__main__":
    demo.launch()