Spaces:
Build error
Build error
File size: 2,993 Bytes
018dafc 6fd65bb 918e6cd fa237f3 c475932 a9ae1dc 918e6cd c475932 a9ae1dc c475932 a9ae1dc c475932 1fe94ae c475932 6fd65bb c475932 32c9322 c475932 6fd65bb c475932 6fd65bb c475932 6fd65bb 018dafc c475932 018dafc c475932 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import numpy as np
import librosa
import joblib
import json
import tensorflow as tf
from huggingface_hub import hf_hub_download
# Download artifacts from Hugging Face Hub
REPO_ID = "hriteshMaikap/languageClassifier"
MODEL_FILENAME = "indic_language_classifier_mtm.keras"
SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
CONFIG_FILENAME = "config_mtm.json"
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
with open(config_path, "r") as f:
config = json.load(f)
n_mfcc = config["n_mfcc"]
max_pad_len = config["max_pad_len"]
feature_type = config["feature_type"]
class_labels = config["class_labels"]
sr = 22050 # Use the same sample rate as in your training
scaler = joblib.load(scaler_path)
model = tf.keras.models.load_model(model_path)
def extract_features(file_path, n_mfcc, max_pad_len, feature_type, sr):
audio, _ = librosa.load(file_path, sr=sr, mono=True, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
if feature_type == 'mfcc_delta':
delta_mfccs = librosa.feature.delta(mfccs)
delta2_mfccs = librosa.feature.delta(mfccs, order=2)
features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
elif feature_type == 'mfcc':
features = mfccs
else:
features = mfccs
current_len = features.shape[1]
if current_len > max_pad_len:
features = features[:, :max_pad_len]
elif current_len < max_pad_len:
pad_width = max_pad_len - current_len
features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
return features.T
def predict_language(audio_filepath):
try:
features = extract_features(audio_filepath, n_mfcc, max_pad_len, feature_type, sr)
features_scaled = scaler.transform(features)
features_scaled = features_scaled[np.newaxis, :, :]
pred_probs = model.predict(features_scaled)
pred_idx = np.argmax(pred_probs, axis=1)[0]
pred_lang = class_labels[pred_idx]
confidence = float(pred_probs[0, pred_idx])
all_probs = {l: float(p) for l, p in zip(class_labels, pred_probs[0])}
prob_str = "\n".join([f"{l}: {p:.3f}" for l, p in all_probs.items()])
return f"**Prediction:** {pred_lang}\n**Confidence:** {confidence:.2%}\n\n**Class Probabilities:**\n{prob_str}"
except Exception as e:
return f"Error processing audio: {str(e)}"
demo = gr.Interface(
fn=predict_language,
inputs=gr.Audio(type="filepath", label="Upload or record audio (.wav or .mp3)"),
outputs=gr.Markdown(),
title="Indic Language Classifier (Marathi, Telugu, Malayalam)",
description="Record or upload an audio sample. The model predicts the language (Marathi, Telugu, or Malayalam)."
)
if __name__ == "__main__":
demo.launch(show_error=True) |