Spaces:
Build error
Build error
import gradio as gr | |
import numpy as np | |
import librosa | |
import tensorflow as tf | |
import json | |
import os | |
import joblib | |
from huggingface_hub import hf_hub_download | |
import warnings | |
# Suppress specific warnings | |
warnings.filterwarnings('ignore', category=FutureWarning, module='librosa') | |
warnings.filterwarnings('ignore', category=UserWarning, module='librosa') | |
# Model repository information | |
REPO_ID = "hriteshMaikap/languageClassifier" | |
MODEL_FILENAME = "indic_language_classifier.keras" | |
SCALER_FILENAME = "audio_feature_scaler.pkl" | |
CONFIG_FILENAME = "config.json" | |
# Initialize global variables to store loaded artifacts | |
model = None | |
scaler = None | |
config = None | |
def load_artifacts(): | |
"""Load model, scaler and configuration from Hugging Face Hub.""" | |
global model, scaler, config | |
try: | |
# Download files from Hugging Face Hub | |
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME) | |
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME) | |
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME) | |
# Load model | |
model = tf.keras.models.load_model(model_path, compile=False) | |
# Load scaler | |
scaler = joblib.load(scaler_path) | |
# Load configuration | |
with open(config_path, 'r') as f: | |
config = json.load(f) | |
return True | |
except Exception as e: | |
print(f"Error loading artifacts: {e}") | |
return False | |
def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type): | |
"""Extracts audio features directly from audio array.""" | |
try: | |
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc) | |
if feature_type == 'mfcc_delta': | |
delta_mfccs = librosa.feature.delta(mfccs) | |
delta2_mfccs = librosa.feature.delta(mfccs, order=2) | |
features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0) | |
elif feature_type == 'mfcc': | |
features = mfccs | |
else: | |
features = mfccs # Fallback | |
current_len = features.shape[1] | |
if current_len > max_pad_len: | |
features = features[:, :max_pad_len] | |
elif current_len < max_pad_len: | |
pad_width = max_pad_len - current_len | |
features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant') | |
return features.T # Transpose to (time_steps, features) | |
except Exception as e: | |
print(f"Error extracting features: {e}") | |
return None | |
def classify_language(audio_path): | |
"""Process audio file and classify language.""" | |
global model, scaler, config | |
# Load artifacts if not loaded | |
if model is None or scaler is None or config is None: | |
if not load_artifacts(): | |
return "Error: Failed to load model artifacts" | |
try: | |
# Get configuration parameters | |
n_mfcc = config.get('n_mfcc', 13) | |
max_pad_len = config.get('max_pad_len', 100) | |
feature_type = config.get('feature_type', 'mfcc_delta') | |
class_labels = config.get('class_labels', []) | |
n_features_expected = config.get('n_features_input', 39) | |
# Load and process audio | |
audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast') | |
# Extract features | |
features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type) | |
if features is None: | |
return "Error: Failed to extract audio features" | |
# Verify feature dimensions | |
if features.shape[1] != n_features_expected: | |
return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})" | |
# Scale features | |
features_reshaped = features.reshape(-1, n_features_expected) | |
features_scaled_reshaped = scaler.transform(features_reshaped) | |
features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected) | |
# Predict | |
prediction_probabilities = model.predict(features_final, verbose=0) | |
predicted_index = np.argmax(prediction_probabilities, axis=1)[0] | |
# Map to language label | |
if 0 <= predicted_index < len(class_labels): | |
predicted_language = class_labels[predicted_index] | |
confidence = prediction_probabilities[0][predicted_index] | |
# Prepare results to display all probabilities | |
results = [] | |
for i, lang in enumerate(class_labels): | |
prob = prediction_probabilities[0][i] | |
results.append(f"{lang}: {prob:.2%}") | |
result_text = f"Predicted Language: {predicted_language} (Confidence: {confidence:.2%})\n\n" | |
result_text += "All Predictions:\n" + "\n".join(results) | |
return result_text | |
else: | |
return f"Error: Predicted index {predicted_index} out of bounds for labels" | |
except Exception as e: | |
import traceback | |
error_msg = f"Error during classification: {e}\n{traceback.format_exc()}" | |
print(error_msg) | |
return f"Error: {str(e)}" | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=classify_language, | |
inputs=gr.Audio(type="filepath", label="Upload or Record Audio"), | |
outputs="text", | |
title="Indian Language Classifier", | |
description="Upload or record audio in an Indian language, and the model will identify which language it is. Supported languages are defined in the configuration file.", | |
examples=[], # You can add example audio files here if available | |
cache_examples=False | |
) | |
# Load artifacts on startup | |
load_artifacts() | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |