hriteshMaikap commited on
Commit
fa237f3
·
verified ·
1 Parent(s): 1fe94ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -169
app.py CHANGED
@@ -1,217 +1,155 @@
1
  import gradio as gr
2
- import tensorflow as tf
3
  import numpy as np
4
- import joblib
5
- import json
6
  import librosa
7
- import logging
 
8
  import os
9
- from datetime import datetime
10
  from huggingface_hub import hf_hub_download
 
11
 
12
- # Configure logging
13
- logging.basicConfig(
14
- level=logging.INFO,
15
- format='%(asctime)s - %(levelname)s - %(message)s'
16
- )
17
- logger = logging.getLogger(__name__)
18
 
19
- # Print initialization info
20
- current_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
21
- print(f"--- Script Start Time (UTC): {current_time} ---")
22
- print(f"User: hriteshMaikap")
23
- print(f"TensorFlow Version: {tf.__version__}")
24
- print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")
25
-
26
- # Define repository and file information
27
  REPO_ID = "hriteshMaikap/languageClassifier"
28
- MODEL_FILENAME = "indic_language_classifier_mtm.keras"
29
- SCALER_FILENAME = "audio_feature_scaler_mtm.pkl"
30
- CONFIG_FILENAME = "config_mtm.json"
31
 
32
- def build_model(input_shape, num_classes):
33
- """Recreate the model architecture"""
34
- model = tf.keras.Sequential(name="Audio_CNN_1D_MTM")
35
-
36
- # Input layer
37
- model.add(tf.keras.layers.Input(shape=input_shape))
38
-
39
- # Conv Block 1
40
- model.add(tf.keras.layers.Conv1D(64, 5, strides=1, padding='same', name='Conv1D_1'))
41
- model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_1'))
42
- model.add(tf.keras.layers.Activation('relu', name='ReLU_1'))
43
- model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_1'))
44
- model.add(tf.keras.layers.Dropout(0.3, name='Dropout_1'))
45
-
46
- # Conv Block 2
47
- model.add(tf.keras.layers.Conv1D(128, 5, strides=1, padding='same', name='Conv1D_2'))
48
- model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_2'))
49
- model.add(tf.keras.layers.Activation('relu', name='ReLU_2'))
50
- model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_2'))
51
- model.add(tf.keras.layers.Dropout(0.3, name='Dropout_2'))
52
-
53
- # Conv Block 3
54
- model.add(tf.keras.layers.Conv1D(256, 5, strides=1, padding='same', name='Conv1D_3'))
55
- model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_3'))
56
- model.add(tf.keras.layers.Activation('relu', name='ReLU_3'))
57
- model.add(tf.keras.layers.MaxPooling1D(2, strides=2, padding='same', name='MaxPool_3'))
58
- model.add(tf.keras.layers.Dropout(0.3, name='Dropout_3'))
59
-
60
- # Flatten & Dense
61
- model.add(tf.keras.layers.Flatten(name='Flatten'))
62
- model.add(tf.keras.layers.Dense(256, name='Dense_1'))
63
- model.add(tf.keras.layers.BatchNormalization(name='BatchNorm_Dense1'))
64
- model.add(tf.keras.layers.Activation('relu', name='ReLU_Dense1'))
65
- model.add(tf.keras.layers.Dropout(0.5, name='Dropout_Dense1'))
66
-
67
- # Output
68
- model.add(tf.keras.layers.Dense(num_classes, activation='softmax', name='Output_Softmax'))
69
-
70
- return model
71
 
72
- # Load resources
73
- try:
74
- logger.info(f"Downloading resources from {REPO_ID}")
75
-
76
- # Download files
77
- model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
78
- scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
79
- config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
80
-
81
- logger.info("Files downloaded successfully")
82
-
83
- # Load config first
84
- with open(config_path, 'r') as f:
85
- config = json.load(f)
86
- logger.info("Config loaded successfully")
87
-
88
- # Load scaler
89
- scaler = joblib.load(scaler_path)
90
- logger.info("Scaler loaded successfully")
91
 
92
- # Build and load model
93
- input_shape = (config['max_pad_len'], config['n_features_input'])
94
- num_classes = len(config['class_labels'])
95
-
96
- # Create model with same architecture
97
- model = build_model(input_shape, num_classes)
98
-
99
- # Load weights from saved model
100
- saved_model = tf.keras.models.load_model(model_path, compile=False)
101
- model.set_weights(saved_model.get_weights())
102
-
103
- logger.info("Model rebuilt and weights loaded successfully")
104
-
105
- except Exception as e:
106
- logger.error(f"Error loading resources: {e}")
107
- raise e
 
 
 
 
108
 
109
- def extract_features(file_path, n_mfcc, max_pad_len, feature_type):
110
- """Extracts specified features, pads/truncates."""
111
  try:
112
- audio, sample_rate = librosa.load(file_path, sr=None, res_type='kaiser_fast')
113
  mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
114
 
115
  if feature_type == 'mfcc_delta':
116
  delta_mfccs = librosa.feature.delta(mfccs)
117
  delta2_mfccs = librosa.feature.delta(mfccs, order=2)
118
  features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
119
- else:
120
  features = mfccs
121
-
 
 
122
  current_len = features.shape[1]
123
  if current_len > max_pad_len:
124
  features = features[:, :max_pad_len]
125
- else:
126
  pad_width = max_pad_len - current_len
127
  features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
128
 
129
- return features.T
130
  except Exception as e:
131
- logger.error(f"Error extracting features: {e}")
132
  return None
133
 
134
- def classify_language(audio_file):
135
- """Main function to classify the language of an audio file"""
 
 
 
 
 
 
 
136
  try:
137
- logger.info(f"Processing audio file of type: {type(audio_file)}")
 
 
 
 
 
138
 
139
- # Extract features
140
- features = extract_features(
141
- audio_file,
142
- config['n_mfcc'],
143
- config['max_pad_len'],
144
- config['feature_type']
145
- )
146
 
 
 
147
  if features is None:
148
- return "Error: Could not process audio file"
149
-
150
- # Scale features
151
- features_reshaped = features.reshape(-1, features.shape[1])
152
- features_scaled = scaler.transform(features_reshaped)
153
- features_scaled = features_scaled.reshape(1, config['max_pad_len'], -1)
154
 
155
- # Make prediction
156
- predictions = model.predict(features_scaled, verbose=0)[0]
 
157
 
158
- # Format results
159
- results = {
160
- lang: float(prob)
161
- for lang, prob in zip(config['class_labels'], predictions)
162
- }
163
 
164
- # Sort by probability
165
- sorted_results = dict(sorted(
166
- results.items(),
167
- key=lambda x: x[1],
168
- reverse=True
169
- ))
170
 
171
- return sorted_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  except Exception as e:
174
- logger.error(f"Error in classification: {e}")
 
 
175
  return f"Error: {str(e)}"
176
 
177
  # Create Gradio interface
178
  demo = gr.Interface(
179
  fn=classify_language,
180
- inputs=gr.Audio(
181
- type="filepath",
182
- label="Audio Input",
183
- ),
184
- outputs=gr.Label(
185
- num_top_classes=3,
186
- label="Language Prediction"
187
- ),
188
- title="Indian Languages Audio Classifier",
189
- description="""
190
- ## Classify Audio in Indian Languages
191
-
192
- This model identifies the language being spoken in an audio clip, choosing between:
193
- - Marathi (मराठी)
194
- - Telugu (తెలుగు)
195
- - Malayalam (മലയാളം)
196
-
197
- ### Instructions:
198
- 1. Click the microphone icon to record or upload an audio file
199
- 2. Submit to get the language classification
200
- 3. Results show confidence scores for each language
201
-
202
- For best results:
203
- - Use clear speech with minimal background noise
204
- - Speak in one of the three supported languages
205
- - Ensure good audio quality
206
- """,
207
- theme="huggingface",
208
- allow_flagging="never"
209
  )
210
 
 
 
 
211
  # Launch the app
212
  if __name__ == "__main__":
213
- demo.launch(
214
- show_error=True,
215
- share=False,
216
- debug=True
217
- )
 
1
  import gradio as gr
 
2
  import numpy as np
 
 
3
  import librosa
4
+ import tensorflow as tf
5
+ import json
6
  import os
7
+ import joblib
8
  from huggingface_hub import hf_hub_download
9
+ import warnings
10
 
11
+ # Suppress specific warnings
12
+ warnings.filterwarnings('ignore', category=FutureWarning, module='librosa')
13
+ warnings.filterwarnings('ignore', category=UserWarning, module='librosa')
 
 
 
14
 
15
+ # Model repository information
 
 
 
 
 
 
 
16
  REPO_ID = "hriteshMaikap/languageClassifier"
17
+ MODEL_FILENAME = "indic_language_classifier.keras"
18
+ SCALER_FILENAME = "audio_feature_scaler.pkl"
19
+ CONFIG_FILENAME = "config.json"
20
 
21
+ # Initialize global variables to store loaded artifacts
22
+ model = None
23
+ scaler = None
24
+ config = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ def load_artifacts():
27
+ """Load model, scaler and configuration from Hugging Face Hub."""
28
+ global model, scaler, config
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ try:
31
+ # Download files from Hugging Face Hub
32
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
33
+ scaler_path = hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME)
34
+ config_path = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)
35
+
36
+ # Load model
37
+ model = tf.keras.models.load_model(model_path, compile=False)
38
+
39
+ # Load scaler
40
+ scaler = joblib.load(scaler_path)
41
+
42
+ # Load configuration
43
+ with open(config_path, 'r') as f:
44
+ config = json.load(f)
45
+
46
+ return True
47
+ except Exception as e:
48
+ print(f"Error loading artifacts: {e}")
49
+ return False
50
 
51
+ def extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type):
52
+ """Extracts audio features directly from audio array."""
53
  try:
 
54
  mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
55
 
56
  if feature_type == 'mfcc_delta':
57
  delta_mfccs = librosa.feature.delta(mfccs)
58
  delta2_mfccs = librosa.feature.delta(mfccs, order=2)
59
  features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
60
+ elif feature_type == 'mfcc':
61
  features = mfccs
62
+ else:
63
+ features = mfccs # Fallback
64
+
65
  current_len = features.shape[1]
66
  if current_len > max_pad_len:
67
  features = features[:, :max_pad_len]
68
+ elif current_len < max_pad_len:
69
  pad_width = max_pad_len - current_len
70
  features = np.pad(features, pad_width=((0, 0), (0, pad_width)), mode='constant')
71
 
72
+ return features.T # Transpose to (time_steps, features)
73
  except Exception as e:
74
+ print(f"Error extracting features: {e}")
75
  return None
76
 
77
+ def classify_language(audio_path):
78
+ """Process audio file and classify language."""
79
+ global model, scaler, config
80
+
81
+ # Load artifacts if not loaded
82
+ if model is None or scaler is None or config is None:
83
+ if not load_artifacts():
84
+ return "Error: Failed to load model artifacts"
85
+
86
  try:
87
+ # Get configuration parameters
88
+ n_mfcc = config.get('n_mfcc', 13)
89
+ max_pad_len = config.get('max_pad_len', 100)
90
+ feature_type = config.get('feature_type', 'mfcc_delta')
91
+ class_labels = config.get('class_labels', [])
92
+ n_features_expected = config.get('n_features_input', 39)
93
 
94
+ # Load and process audio
95
+ audio, sample_rate = librosa.load(audio_path, sr=None, res_type='kaiser_fast')
 
 
 
 
 
96
 
97
+ # Extract features
98
+ features = extract_features(audio, sample_rate, n_mfcc, max_pad_len, feature_type)
99
  if features is None:
100
+ return "Error: Failed to extract audio features"
 
 
 
 
 
101
 
102
+ # Verify feature dimensions
103
+ if features.shape[1] != n_features_expected:
104
+ return f"Error: Extracted feature dimension ({features.shape[1]}) doesn't match expected ({n_features_expected})"
105
 
106
+ # Scale features
107
+ features_reshaped = features.reshape(-1, n_features_expected)
108
+ features_scaled_reshaped = scaler.transform(features_reshaped)
109
+ features_final = features_scaled_reshaped.reshape(1, max_pad_len, n_features_expected)
 
110
 
111
+ # Predict
112
+ prediction_probabilities = model.predict(features_final, verbose=0)
113
+ predicted_index = np.argmax(prediction_probabilities, axis=1)[0]
 
 
 
114
 
115
+ # Map to language label
116
+ if 0 <= predicted_index < len(class_labels):
117
+ predicted_language = class_labels[predicted_index]
118
+ confidence = prediction_probabilities[0][predicted_index]
119
+
120
+ # Prepare results to display all probabilities
121
+ results = []
122
+ for i, lang in enumerate(class_labels):
123
+ prob = prediction_probabilities[0][i]
124
+ results.append(f"{lang}: {prob:.2%}")
125
+
126
+ result_text = f"Predicted Language: {predicted_language} (Confidence: {confidence:.2%})\n\n"
127
+ result_text += "All Predictions:\n" + "\n".join(results)
128
+
129
+ return result_text
130
+ else:
131
+ return f"Error: Predicted index {predicted_index} out of bounds for labels"
132
 
133
  except Exception as e:
134
+ import traceback
135
+ error_msg = f"Error during classification: {e}\n{traceback.format_exc()}"
136
+ print(error_msg)
137
  return f"Error: {str(e)}"
138
 
139
  # Create Gradio interface
140
  demo = gr.Interface(
141
  fn=classify_language,
142
+ inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
143
+ outputs="text",
144
+ title="Indian Language Classifier",
145
+ description="Upload or record audio in an Indian language, and the model will identify which language it is. Supported languages are defined in the configuration file.",
146
+ examples=[], # You can add example audio files here if available
147
+ cache_examples=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  )
149
 
150
+ # Load artifacts on startup
151
+ load_artifacts()
152
+
153
  # Launch the app
154
  if __name__ == "__main__":
155
+ demo.launch()