Spaces:
Build error
Build error
import gradio as gr | |
import numpy as np | |
import librosa | |
import joblib | |
import json | |
import tensorflow as tf | |
from huggingface_hub import hf_hub_download | |
# Download artifacts from Hugging Face Hub | |
REPO_ID = "hriteshMaikap/languageClassifier" | |
MODEL_FILENAME = "indic_language_classifier_mtm.keras" | |
SCALER_FILENAME = "audio_feature_scaler_mtm.pkl" | |
CONFIG_FILENAME = "config_mtm.json" | |
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME) | |
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME) | |
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME) | |
with open(config_path, "r") as f: | |
config = json.load(f) | |
n_mfcc = config["n_mfcc"] | |
max_pad_len = config["max_pad_len"] | |
feature_type = config["feature_type"] | |
class_labels = config["class_labels"] | |
sr = 22050 # Use the same sample rate as in your training | |
scaler = joblib.load(scaler_path) | |
model = tf.keras.models.load_model(model_path) | |
def extract_features(file_path, n_mfcc, max_pad_len, feature_type, sr): | |
audio, _ = librosa.load(file_path, sr=sr, mono=True, res_type='kaiser_fast') | |
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc) | |
if feature_type == 'mfcc_delta': | |
delta_mfccs = librosa.feature.delta(mfccs) | |
delta2_mfccs = librosa.feature.delta(mfccs, order=2) | |
features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0) | |
elif feature_type == 'mfcc': | |
features = mfccs | |
else: | |
features = mfccs | |
current_len = features.shape[1] | |
if current_len > max_pad_len: | |
features = features[:, :max_pad_len] | |
elif current_len < max_pad_len: | |
pad_width = max_pad_len - current_len | |
features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant') | |
return features.T | |
def predict_language(audio_filepath): | |
try: | |
features = extract_features(audio_filepath, n_mfcc, max_pad_len, feature_type, sr) | |
features_scaled = scaler.transform(features) | |
features_scaled = features_scaled[np.newaxis, :, :] | |
pred_probs = model.predict(features_scaled) | |
pred_idx = np.argmax(pred_probs, axis=1)[0] | |
pred_lang = class_labels[pred_idx] | |
confidence = float(pred_probs[0, pred_idx]) | |
all_probs = {l: float(p) for l, p in zip(class_labels, pred_probs[0])} | |
prob_str = "\n".join([f"{l}: {p:.3f}" for l, p in all_probs.items()]) | |
return f"**Prediction:** {pred_lang}\n**Confidence:** {confidence:.2%}\n\n**Class Probabilities:**\n{prob_str}" | |
except Exception as e: | |
return f"Error processing audio: {str(e)}" | |
demo = gr.Interface( | |
fn=predict_language, | |
inputs=gr.Audio(type="filepath", label="Upload or record audio (.wav or .mp3)"), | |
outputs=gr.Markdown(), | |
title="Indic Language Classifier (Marathi, Telugu, Malayalam)", | |
description="Record or upload an audio sample. The model predicts the language (Marathi, Telugu, or Malayalam)." | |
) | |
if __name__ == "__main__": | |
demo.launch(show_error=True) |