Spaces:

hriteshMaikap
/

indic-languages-classifier

Build error

File size: 2,993 Bytes

018dafc
6fd65bb
918e6cd
fa237f3
c475932
 
a9ae1dc
918e6cd
c475932
a9ae1dc
c475932
 
 
a9ae1dc
c475932
 
 
1fe94ae
c475932
 
 
 
 
 
 
6fd65bb
c475932
 
32c9322
c475932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fd65bb
c475932
 
 
 
 
 
 
 
 
 
6fd65bb
c475932
6fd65bb
018dafc
c475932
 
 
 
 
018dafc
 
 
c475932

import gradio as gr
import numpy as np
import librosa
import joblib
import json
import tensorflow as tf
from huggingface_hub import hf_hub_download

# Download artifacts from Hugging Face Hub
REPO_ID = "hriteshMaikap/languageClassifier"
MODEL_FILENAME = "indic_language_classifier_mtm.keras"
SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
CONFIG_FILENAME = "config_mtm.json"

model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)

with open(config_path, "r") as f:
    config = json.load(f)
n_mfcc = config["n_mfcc"]
max_pad_len = config["max_pad_len"]
feature_type = config["feature_type"]
class_labels = config["class_labels"]
sr = 22050  # Use the same sample rate as in your training

scaler = joblib.load(scaler_path)
model = tf.keras.models.load_model(model_path)

def extract_features(file_path, n_mfcc, max_pad_len, feature_type, sr):
    audio, _ = librosa.load(file_path, sr=sr, mono=True, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    if feature_type == 'mfcc_delta':
        delta_mfccs = librosa.feature.delta(mfccs)
        delta2_mfccs = librosa.feature.delta(mfccs, order=2)
        features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
    elif feature_type == 'mfcc':
        features = mfccs
    else:
        features = mfccs
    current_len = features.shape[1]
    if current_len > max_pad_len:
        features = features[:, :max_pad_len]
    elif current_len < max_pad_len:
        pad_width = max_pad_len - current_len
        features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
    return features.T

def predict_language(audio_filepath):
    try:
        features = extract_features(audio_filepath, n_mfcc, max_pad_len, feature_type, sr)
        features_scaled = scaler.transform(features)
        features_scaled = features_scaled[np.newaxis, :, :]
        pred_probs = model.predict(features_scaled)
        pred_idx = np.argmax(pred_probs, axis=1)[0]
        pred_lang = class_labels[pred_idx]
        confidence = float(pred_probs[0, pred_idx])
        all_probs = {l: float(p) for l, p in zip(class_labels, pred_probs[0])}
        prob_str = "\n".join([f"{l}: {p:.3f}" for l, p in all_probs.items()])
        return f"**Prediction:** {pred_lang}\n**Confidence:** {confidence:.2%}\n\n**Class Probabilities:**\n{prob_str}"
    except Exception as e:
        return f"Error processing audio: {str(e)}"

demo = gr.Interface(
    fn=predict_language,
    inputs=gr.Audio(type="filepath", label="Upload or record audio (.wav or .mp3)"),
    outputs=gr.Markdown(),
    title="Indic Language Classifier (Marathi, Telugu, Malayalam)",
    description="Record or upload an audio sample. The model predicts the language (Marathi, Telugu, or Malayalam)."
)

if __name__ == "__main__":
    demo.launch(show_error=True)