Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,217 +1,155 @@
|
|
1 |
import gradio as gr
|
2 |
-
import tensorflow as tf
|
3 |
import numpy as np
|
4 |
-
import joblib
|
5 |
-
import json
|
6 |
import librosa
|
7 |
-
import
|
|
|
8 |
import os
|
9 |
-
|
10 |
from huggingface_hub import hf_hub_download
|
|
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
format='%(asctime)s - %(levelname)s - %(message)s'
|
16 |
-
)
|
17 |
-
logger = logging.getLogger(__name__)
|
18 |
|
19 |
-
#
|
20 |
-
current_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
|
21 |
-
print(f"--- Script Start Time (UTC): {current_time} ---")
|
22 |
-
print(f"User: hriteshMaikap")
|
23 |
-
print(f"TensorFlow Version: {tf.__version__}")
|
24 |
-
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")
|
25 |
-
|
26 |
-
# Define repository and file information
|
27 |
REPO_ID = "hriteshMaikap/languageClassifier"
|
28 |
-
MODEL_FILENAME = "
|
29 |
-
SCALER_FILENAME = "
|
30 |
-
CONFIG_FILENAME = "
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
# Input layer
|
37 |
-
model.add(tf.keras.layers.Input(shape=input_shape))
|
38 |
-
|
39 |
-
# Conv Block 1
|
40 |
-
model.add(tf.keras.layers.Conv1D(64, 5, strides=1, padding='same', name='Conv1D_1'))
|
41 |
-
model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_1'))
|
42 |
-
model.add(tf.keras.layers.Activation('relu', name='ReLU_1'))
|
43 |
-
model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_1'))
|
44 |
-
model.add(tf.keras.layers.Dropout(0.3, name='Dropout_1'))
|
45 |
-
|
46 |
-
# Conv Block 2
|
47 |
-
model.add(tf.keras.layers.Conv1D(128, 5, strides=1, padding='same', name='Conv1D_2'))
|
48 |
-
model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_2'))
|
49 |
-
model.add(tf.keras.layers.Activation('relu', name='ReLU_2'))
|
50 |
-
model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_2'))
|
51 |
-
model.add(tf.keras.layers.Dropout(0.3, name='Dropout_2'))
|
52 |
-
|
53 |
-
# Conv Block 3
|
54 |
-
model.add(tf.keras.layers.Conv1D(256, 5, strides=1, padding='same', name='Conv1D_3'))
|
55 |
-
model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_3'))
|
56 |
-
model.add(tf.keras.layers.Activation('relu', name='ReLU_3'))
|
57 |
-
model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_3'))
|
58 |
-
model.add(tf.keras.layers.Dropout(0.3, name='Dropout_3'))
|
59 |
-
|
60 |
-
# Flatten & Dense
|
61 |
-
model.add(tf.keras.layers.Flatten(name='Flatten'))
|
62 |
-
model.add(tf.keras.layers.Dense(256, name='Dense_1'))
|
63 |
-
model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_Dense1'))
|
64 |
-
model.add(tf.keras.layers.Activation('relu', name='ReLU_Dense1'))
|
65 |
-
model.add(tf.keras.layers.Dropout(0.5, name='Dropout_Dense1'))
|
66 |
-
|
67 |
-
# Output
|
68 |
-
model.add(tf.keras.layers.Dense(num_classes, activation='softmax', name='Output_Softmax'))
|
69 |
-
|
70 |
-
return model
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
# Download files
|
77 |
-
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
|
78 |
-
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
|
79 |
-
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
|
80 |
-
|
81 |
-
logger.info("Files downloaded successfully")
|
82 |
-
|
83 |
-
# Load config first
|
84 |
-
with open(config_path, 'r') as f:
|
85 |
-
config = json.load(f)
|
86 |
-
logger.info("Config loaded successfully")
|
87 |
-
|
88 |
-
# Load scaler
|
89 |
-
scaler = joblib.load(scaler_path)
|
90 |
-
logger.info("Scaler loaded successfully")
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
108 |
|
109 |
-
def extract_features(
|
110 |
-
"""Extracts
|
111 |
try:
|
112 |
-
audio, sample_rate = librosa.load(file_path, sr=None, res_type='kaiser_fast')
|
113 |
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
|
114 |
|
115 |
if feature_type == 'mfcc_delta':
|
116 |
delta_mfccs = librosa.feature.delta(mfccs)
|
117 |
delta2_mfccs = librosa.feature.delta(mfccs, order=2)
|
118 |
features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
|
119 |
-
|
120 |
features = mfccs
|
121 |
-
|
|
|
|
|
122 |
current_len = features.shape[1]
|
123 |
if current_len > max_pad_len:
|
124 |
features = features[:, :max_pad_len]
|
125 |
-
|
126 |
pad_width = max_pad_len - current_len
|
127 |
features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
|
128 |
|
129 |
-
return features.T
|
130 |
except Exception as e:
|
131 |
-
|
132 |
return None
|
133 |
|
134 |
-
def classify_language(
|
135 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
try:
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
-
#
|
140 |
-
|
141 |
-
audio_file,
|
142 |
-
config['n_mfcc'],
|
143 |
-
config['max_pad_len'],
|
144 |
-
config['feature_type']
|
145 |
-
)
|
146 |
|
|
|
|
|
147 |
if features is None:
|
148 |
-
return "Error:
|
149 |
-
|
150 |
-
# Scale features
|
151 |
-
features_reshaped = features.reshape(-1, features.shape[1])
|
152 |
-
features_scaled = scaler.transform(features_reshaped)
|
153 |
-
features_scaled = features_scaled.reshape(1, config['max_pad_len'], -1)
|
154 |
|
155 |
-
#
|
156 |
-
|
|
|
157 |
|
158 |
-
#
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
}
|
163 |
|
164 |
-
#
|
165 |
-
|
166 |
-
|
167 |
-
key=lambda x: x[1],
|
168 |
-
reverse=True
|
169 |
-
))
|
170 |
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
except Exception as e:
|
174 |
-
|
|
|
|
|
175 |
return f"Error: {str(e)}"
|
176 |
|
177 |
# Create Gradio interface
|
178 |
demo = gr.Interface(
|
179 |
fn=classify_language,
|
180 |
-
inputs=gr.Audio(
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
label="Language Prediction"
|
187 |
-
),
|
188 |
-
title="Indian Languages Audio Classifier",
|
189 |
-
description="""
|
190 |
-
## Classify Audio in Indian Languages
|
191 |
-
|
192 |
-
This model identifies the language being spoken in an audio clip, choosing between:
|
193 |
-
- Marathi (मराठी)
|
194 |
-
- Telugu (తెలుగు)
|
195 |
-
- Malayalam (മലയാളം)
|
196 |
-
|
197 |
-
### Instructions:
|
198 |
-
1. Click the microphone icon to record or upload an audio file
|
199 |
-
2. Submit to get the language classification
|
200 |
-
3. Results show confidence scores for each language
|
201 |
-
|
202 |
-
For best results:
|
203 |
-
- Use clear speech with minimal background noise
|
204 |
-
- Speak in one of the three supported languages
|
205 |
-
- Ensure good audio quality
|
206 |
-
""",
|
207 |
-
theme="huggingface",
|
208 |
-
allow_flagging="never"
|
209 |
)
|
210 |
|
|
|
|
|
|
|
211 |
# Launch the app
|
212 |
if __name__ == "__main__":
|
213 |
-
demo.launch(
|
214 |
-
show_error=True,
|
215 |
-
share=False,
|
216 |
-
debug=True
|
217 |
-
)
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import numpy as np
|
|
|
|
|
3 |
import librosa
|
4 |
+
import tensorflow as tf
|
5 |
+
import json
|
6 |
import os
|
7 |
+
import joblib
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
+
import warnings
|
10 |
|
11 |
+
# Suppress specific warnings
|
12 |
+
warnings.filterwarnings('ignore', category=FutureWarning, module='librosa')
|
13 |
+
warnings.filterwarnings('ignore', category=UserWarning, module='librosa')
|
|
|
|
|
|
|
14 |
|
15 |
+
# Model repository information
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
REPO_ID = "hriteshMaikap/languageClassifier"
|
17 |
+
MODEL_FILENAME = "indic_language_classifier.keras"
|
18 |
+
SCALER_FILENAME = "audio_feature_scaler.pkl"
|
19 |
+
CONFIG_FILENAME = "config.json"
|
20 |
|
21 |
+
# Initialize global variables to store loaded artifacts
|
22 |
+
model = None
|
23 |
+
scaler = None
|
24 |
+
config = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
def load_artifacts():
|
27 |
+
"""Load model, scaler and configuration from Hugging Face Hub."""
|
28 |
+
global model, scaler, config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
try:
|
31 |
+
# Download files from Hugging Face Hub
|
32 |
+
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
|
33 |
+
scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
|
34 |
+
config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
|
35 |
+
|
36 |
+
# Load model
|
37 |
+
model = tf.keras.models.load_model(model_path, compile=False)
|
38 |
+
|
39 |
+
# Load scaler
|
40 |
+
scaler = joblib.load(scaler_path)
|
41 |
+
|
42 |
+
# Load configuration
|
43 |
+
with open(config_path, 'r') as f:
|
44 |
+
config = json.load(f)
|
45 |
+
|
46 |
+
return True
|
47 |
+
except Exception as e:
|
48 |
+
print(f"Error loading artifacts: {e}")
|
49 |
+
return False
|
50 |
|
51 |
+
def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
|
52 |
+
"""Extracts audio features directly from audio array."""
|
53 |
try:
|
|
|
54 |
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
|
55 |
|
56 |
if feature_type == 'mfcc_delta':
|
57 |
delta_mfccs = librosa.feature.delta(mfccs)
|
58 |
delta2_mfccs = librosa.feature.delta(mfccs, order=2)
|
59 |
features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
|
60 |
+
elif feature_type == 'mfcc':
|
61 |
features = mfccs
|
62 |
+
else:
|
63 |
+
features = mfccs # Fallback
|
64 |
+
|
65 |
current_len = features.shape[1]
|
66 |
if current_len > max_pad_len:
|
67 |
features = features[:, :max_pad_len]
|
68 |
+
elif current_len < max_pad_len:
|
69 |
pad_width = max_pad_len - current_len
|
70 |
features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
|
71 |
|
72 |
+
return features.T # Transpose to (time_steps, features)
|
73 |
except Exception as e:
|
74 |
+
print(f"Error extracting features: {e}")
|
75 |
return None
|
76 |
|
77 |
+
def classify_language(audio_path):
|
78 |
+
"""Process audio file and classify language."""
|
79 |
+
global model, scaler, config
|
80 |
+
|
81 |
+
# Load artifacts if not loaded
|
82 |
+
if model is None or scaler is None or config is None:
|
83 |
+
if not load_artifacts():
|
84 |
+
return "Error: Failed to load model artifacts"
|
85 |
+
|
86 |
try:
|
87 |
+
# Get configuration parameters
|
88 |
+
n_mfcc = config.get('n_mfcc', 13)
|
89 |
+
max_pad_len = config.get('max_pad_len', 100)
|
90 |
+
feature_type = config.get('feature_type', 'mfcc_delta')
|
91 |
+
class_labels = config.get('class_labels', [])
|
92 |
+
n_features_expected = config.get('n_features_input', 39)
|
93 |
|
94 |
+
# Load and process audio
|
95 |
+
audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
+
# Extract features
|
98 |
+
features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
|
99 |
if features is None:
|
100 |
+
return "Error: Failed to extract audio features"
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
+
# Verify feature dimensions
|
103 |
+
if features.shape[1] != n_features_expected:
|
104 |
+
return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
|
105 |
|
106 |
+
# Scale features
|
107 |
+
features_reshaped = features.reshape(-1, n_features_expected)
|
108 |
+
features_scaled_reshaped = scaler.transform(features_reshaped)
|
109 |
+
features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
|
|
|
110 |
|
111 |
+
# Predict
|
112 |
+
prediction_probabilities = model.predict(features_final, verbose=0)
|
113 |
+
predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
|
|
|
|
|
|
|
114 |
|
115 |
+
# Map to language label
|
116 |
+
if 0 <= predicted_index < len(class_labels):
|
117 |
+
predicted_language = class_labels[predicted_index]
|
118 |
+
confidence = prediction_probabilities[0][predicted_index]
|
119 |
+
|
120 |
+
# Prepare results to display all probabilities
|
121 |
+
results = []
|
122 |
+
for i, lang in enumerate(class_labels):
|
123 |
+
prob = prediction_probabilities[0][i]
|
124 |
+
results.append(f"{lang}: {prob:.2%}")
|
125 |
+
|
126 |
+
result_text = f"Predicted Language: {predicted_language} (Confidence: {confidence:.2%})\n\n"
|
127 |
+
result_text += "All Predictions:\n" + "\n".join(results)
|
128 |
+
|
129 |
+
return result_text
|
130 |
+
else:
|
131 |
+
return f"Error: Predicted index {predicted_index} out of bounds for labels"
|
132 |
|
133 |
except Exception as e:
|
134 |
+
import traceback
|
135 |
+
error_msg = f"Error during classification: {e}\n{traceback.format_exc()}"
|
136 |
+
print(error_msg)
|
137 |
return f"Error: {str(e)}"
|
138 |
|
139 |
# Create Gradio interface
|
140 |
demo = gr.Interface(
|
141 |
fn=classify_language,
|
142 |
+
inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
|
143 |
+
outputs="text",
|
144 |
+
title="Indian Language Classifier",
|
145 |
+
description="Upload or record audio in an Indian language, and the model will identify which language it is. Supported languages are defined in the configuration file.",
|
146 |
+
examples=[], # You can add example audio files here if available
|
147 |
+
cache_examples=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
)
|
149 |
|
150 |
+
# Load artifacts on startup
|
151 |
+
load_artifacts()
|
152 |
+
|
153 |
# Launch the app
|
154 |
if __name__ == "__main__":
|
155 |
+
demo.launch()
|
|
|
|
|
|
|
|