import gradio as gr import numpy as np import librosa import joblib import json import tensorflow as tf from huggingface_hub import hf_hub_download # Download artifacts from Hugging Face Hub REPO_ID = "hriteshMaikap/languageClassifier" MODEL_FILENAME = "indic_language_classifier_mtm.keras" SCALER_FILENAME = "audio_feature_scaler_mtm.pkl" CONFIG_FILENAME = "config_mtm.json" model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME) scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME) config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME) with open(config_path, "r") as f: config = json.load(f) n_mfcc = config["n_mfcc"] max_pad_len = config["max_pad_len"] feature_type = config["feature_type"] class_labels = config["class_labels"] sr = 22050 # Use the same sample rate as in your training scaler = joblib.load(scaler_path) model = tf.keras.models.load_model(model_path) def extract_features(file_path, n_mfcc, max_pad_len, feature_type, sr): audio, _ = librosa.load(file_path, sr=sr, mono=True, res_type='kaiser_fast') mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc) if feature_type == 'mfcc_delta': delta_mfccs = librosa.feature.delta(mfccs) delta2_mfccs = librosa.feature.delta(mfccs, order=2) features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0) elif feature_type == 'mfcc': features = mfccs else: features = mfccs current_len = features.shape[1] if current_len > max_pad_len: features = features[:, :max_pad_len] elif current_len < max_pad_len: pad_width = max_pad_len - current_len features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant') return features.T def predict_language(audio_filepath): try: features = extract_features(audio_filepath, n_mfcc, max_pad_len, feature_type, sr) features_scaled = scaler.transform(features) features_scaled = features_scaled[np.newaxis, :, :] pred_probs = model.predict(features_scaled) pred_idx = np.argmax(pred_probs, axis=1)[0] pred_lang = class_labels[pred_idx] confidence = float(pred_probs[0, pred_idx]) all_probs = {l: float(p) for l, p in zip(class_labels, pred_probs[0])} prob_str = "\n".join([f"{l}: {p:.3f}" for l, p in all_probs.items()]) return f"**Prediction:** {pred_lang}\n**Confidence:** {confidence:.2%}\n\n**Class Probabilities:**\n{prob_str}" except Exception as e: return f"Error processing audio: {str(e)}" demo = gr.Interface( fn=predict_language, inputs=gr.Audio(type="filepath", label="Upload or record audio (.wav or .mp3)"), outputs=gr.Markdown(), title="Indic Language Classifier (Marathi, Telugu, Malayalam)", description="Record or upload an audio sample. The model predicts the language (Marathi, Telugu, or Malayalam)." ) if __name__ == "__main__": demo.launch(show_error=True)