Spaces:

hriteshMaikap
/

indic-languages-classifier

Build error

App Files Files Community

indic-languages-classifier / app.py

hriteshMaikap

Update app.py

fa237f3 verified about 1 month ago

raw

history blame

5.85 kB

	import gradio as gr
	import numpy as np
	import librosa
	import tensorflow as tf
	import json
	import os
	import joblib
	from huggingface_hub import hf_hub_download
	import warnings

	# Suppress specific warnings
	warnings.filterwarnings('ignore', category=FutureWarning, module='librosa')
	warnings.filterwarnings('ignore', category=UserWarning, module='librosa')

	# Model repository information
	REPO_ID = "hriteshMaikap/languageClassifier"
	MODEL_FILENAME = "indic_language_classifier.keras"
	SCALER_FILENAME = "audio_feature_scaler.pkl"
	CONFIG_FILENAME = "config.json"

	# Initialize global variables to store loaded artifacts
	model = None
	scaler = None
	config = None

	def load_artifacts():
	"""Load model, scaler and configuration from Hugging Face Hub."""
	global model, scaler, config

	try:
	# Download files from Hugging Face Hub
	model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
	scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
	config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)

	# Load model
	model = tf.keras.models.load_model(model_path, compile=False)

	# Load scaler
	scaler = joblib.load(scaler_path)

	# Load configuration
	with open(config_path, 'r') as f:
	config = json.load(f)

	return True
	except Exception as e:
	print(f"Error loading artifacts: {e}")
	return False

	def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
	"""Extracts audio features directly from audio array."""
	try:
	mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)

	if feature_type == 'mfcc_delta':
	delta_mfccs = librosa.feature.delta(mfccs)
	delta2_mfccs = librosa.feature.delta(mfccs, order=2)
	features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
	elif feature_type == 'mfcc':
	features = mfccs
	else:
	features = mfccs # Fallback

	current_len = features.shape[1]
	if current_len > max_pad_len:
	features = features[:, :max_pad_len]
	elif current_len < max_pad_len:
	pad_width = max_pad_len - current_len
	features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')

	return features.T # Transpose to (time_steps, features)
	except Exception as e:
	print(f"Error extracting features: {e}")
	return None

	def classify_language(audio_path):
	"""Process audio file and classify language."""
	global model, scaler, config

	# Load artifacts if not loaded
	if model is None or scaler is None or config is None:
	if not load_artifacts():
	return "Error: Failed to load model artifacts"

	try:
	# Get configuration parameters
	n_mfcc = config.get('n_mfcc', 13)
	max_pad_len = config.get('max_pad_len', 100)
	feature_type = config.get('feature_type', 'mfcc_delta')
	class_labels = config.get('class_labels', [])
	n_features_expected = config.get('n_features_input', 39)

	# Load and process audio
	audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')

	# Extract features
	features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
	if features is None:
	return "Error: Failed to extract audio features"

	# Verify feature dimensions
	if features.shape[1] != n_features_expected:
	return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"

	# Scale features
	features_reshaped = features.reshape(-1, n_features_expected)
	features_scaled_reshaped = scaler.transform(features_reshaped)
	features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)

	# Predict
	prediction_probabilities = model.predict(features_final, verbose=0)
	predicted_index = np.argmax(prediction_probabilities, axis=1)[0]

	# Map to language label
	if 0 <= predicted_index < len(class_labels):
	predicted_language = class_labels[predicted_index]
	confidence = prediction_probabilities[0][predicted_index]

	# Prepare results to display all probabilities
	results = []
	for i, lang in enumerate(class_labels):
	prob = prediction_probabilities[0][i]
	results.append(f"{lang}: {prob:.2%}")

	result_text = f"Predicted Language: {predicted_language} (Confidence: {confidence:.2%})\n\n"
	result_text += "All Predictions:\n" + "\n".join(results)

	return result_text
	else:
	return f"Error: Predicted index {predicted_index} out of bounds for labels"

	except Exception as e:
	import traceback
	error_msg = f"Error during classification: {e}\n{traceback.format_exc()}"
	print(error_msg)
	return f"Error: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=classify_language,
	inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
	outputs="text",
	title="Indian Language Classifier",
	description="Upload or record audio in an Indian language, and the model will identify which language it is. Supported languages are defined in the configuration file.",
	examples=[], # You can add example audio files here if available
	cache_examples=False
	)

	# Load artifacts on startup
	load_artifacts()

	# Launch the app
	if __name__ == "__main__":
	demo.launch()