hriteshMaikap's picture
Update app.py
c475932 verified
raw
history blame contribute delete
2.99 kB
import gradio as gr
import numpy as np
import librosa
import joblib
import json
import tensorflow as tf
from huggingface_hub import hf_hub_download
# Download artifacts from Hugging Face Hub
REPO_ID = "hriteshMaikap/languageClassifier"
MODEL_FILENAME = "indic_language_classifier_mtm.keras"
SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
CONFIG_FILENAME = "config_mtm.json"
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
with open(config_path, "r") as f:
config = json.load(f)
n_mfcc = config["n_mfcc"]
max_pad_len = config["max_pad_len"]
feature_type = config["feature_type"]
class_labels = config["class_labels"]
sr = 22050 # Use the same sample rate as in your training
scaler = joblib.load(scaler_path)
model = tf.keras.models.load_model(model_path)
def extract_features(file_path, n_mfcc, max_pad_len, feature_type, sr):
audio, _ = librosa.load(file_path, sr=sr, mono=True, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
if feature_type == 'mfcc_delta':
delta_mfccs = librosa.feature.delta(mfccs)
delta2_mfccs = librosa.feature.delta(mfccs, order=2)
features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
elif feature_type == 'mfcc':
features = mfccs
else:
features = mfccs
current_len = features.shape[1]
if current_len > max_pad_len:
features = features[:, :max_pad_len]
elif current_len < max_pad_len:
pad_width = max_pad_len - current_len
features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
return features.T
def predict_language(audio_filepath):
try:
features = extract_features(audio_filepath, n_mfcc, max_pad_len, feature_type, sr)
features_scaled = scaler.transform(features)
features_scaled = features_scaled[np.newaxis, :, :]
pred_probs = model.predict(features_scaled)
pred_idx = np.argmax(pred_probs, axis=1)[0]
pred_lang = class_labels[pred_idx]
confidence = float(pred_probs[0, pred_idx])
all_probs = {l: float(p) for l, p in zip(class_labels, pred_probs[0])}
prob_str = "\n".join([f"{l}: {p:.3f}" for l, p in all_probs.items()])
return f"**Prediction:** {pred_lang}\n**Confidence:** {confidence:.2%}\n\n**Class Probabilities:**\n{prob_str}"
except Exception as e:
return f"Error processing audio: {str(e)}"
demo = gr.Interface(
fn=predict_language,
inputs=gr.Audio(type="filepath", label="Upload or record audio (.wav or .mp3)"),
outputs=gr.Markdown(),
title="Indic Language Classifier (Marathi, Telugu, Malayalam)",
description="Record or upload an audio sample. The model predicts the language (Marathi, Telugu, or Malayalam)."
)
if __name__ == "__main__":
demo.launch(show_error=True)