Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import torch | |
import joblib | |
import numpy as np | |
from sklearn.impute import SimpleImputer | |
from NN_classifier.simple_binary_classifier import Medium_Binary_Network | |
from NN_classifier.neural_net_t import Neural_Network | |
from feature_extraction import extract_features | |
import pandas as pd | |
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
def load_model(model_dir='models/medium_binary_classifier'): | |
model_path = os.path.join(model_dir, 'nn_model.pt') | |
scaler_path = os.path.join(model_dir, 'scaler.joblib') | |
encoder_path = os.path.join(model_dir, 'label_encoder.joblib') | |
imputer_path = os.path.join(model_dir, 'imputer.joblib') | |
if not os.path.exists(model_path): | |
raise FileNotFoundError(f"Model not found at: {model_path}") | |
label_encoder = joblib.load(encoder_path) | |
scaler = joblib.load(scaler_path) | |
imputer = None | |
if os.path.exists(imputer_path): | |
imputer = joblib.load(imputer_path) | |
else: | |
print("Warning: Imputer not found, will create a new one during classification") | |
input_size = scaler.n_features_in_ | |
model = Medium_Binary_Network(input_size, hidden_sizes=[256, 192, 128, 64], dropout=0.3).to(DEVICE) | |
model.load_state_dict(torch.load(model_path, map_location=DEVICE)) | |
model.eval() | |
if imputer is not None: | |
try: | |
if hasattr(imputer, 'feature_names_in_'): | |
print(f"Imputer has {len(imputer.feature_names_in_)} features") | |
print(f"First few feature names: {imputer.feature_names_in_[:5]}") | |
else: | |
print("Warning: Imputer does not have feature_names_in_ attribute") | |
except Exception as e: | |
print(f"Error checking imputer: {str(e)}") | |
return model, scaler, label_encoder, imputer | |
def load_ternary_model(model_dir='models/neural_network'): | |
model_path = os.path.join(model_dir, 'nn_model.pt') | |
scaler_path = os.path.join(model_dir, 'scaler.joblib') | |
encoder_path = os.path.join(model_dir, 'label_encoder.joblib') | |
imputer_path = os.path.join(model_dir, 'imputer.joblib') | |
if not os.path.exists(model_path): | |
raise FileNotFoundError(f"Model not found at: {model_path}") | |
label_encoder = joblib.load(encoder_path) | |
scaler = joblib.load(scaler_path) | |
imputer = None | |
if os.path.exists(imputer_path): | |
imputer = joblib.load(imputer_path) | |
else: | |
print("Warning: Imputer not found, will create a new one during classification") | |
input_size = scaler.n_features_in_ | |
num_classes = len(label_encoder.classes_) | |
model = Neural_Network(input_size, hidden_layers=[128, 96, 64, 32], num_classes=num_classes, dropout_rate=0.1).to(DEVICE) | |
model.load_state_dict(torch.load(model_path, map_location=DEVICE)) | |
model.eval() | |
print(f"Loaded ternary classifier model with {num_classes} classes: {label_encoder.classes_}") | |
if imputer is not None: | |
try: | |
if hasattr(imputer, 'feature_names_in_'): | |
print(f"Imputer has {len(imputer.feature_names_in_)} features") | |
print(f"First few feature names: {imputer.feature_names_in_[:5]}") | |
else: | |
print("Warning: Imputer does not have feature_names_in_ attribute") | |
except Exception as e: | |
print(f"Error checking imputer: {str(e)}") | |
return model, scaler, label_encoder, imputer | |
def classify_text(text, model, scaler, label_encoder, imputer=None, scores=None): | |
features_df, text_analysis = extract_features(text, scores=scores) | |
if imputer is not None: | |
expected_feature_names = imputer.feature_names_in_ | |
else: | |
expected_feature_names = None | |
if expected_feature_names is not None: | |
aligned_features = pd.DataFrame(columns=expected_feature_names) | |
for col in features_df.columns: | |
if col in expected_feature_names: | |
aligned_features[col] = features_df[col] | |
for col in expected_feature_names: | |
if col not in aligned_features.columns or aligned_features[col].isnull().all(): | |
aligned_features[col] = 0 | |
print(f"Added missing feature: {col}") | |
features_df = aligned_features | |
if imputer is None: | |
print("Warning: No imputer provided, creating a new one") | |
imputer = SimpleImputer(strategy='mean') | |
features = imputer.fit_transform(features_df) | |
else: | |
features = imputer.transform(features_df) | |
features_scaled = scaler.transform(features) | |
features_tensor = torch.FloatTensor(features_scaled).to(DEVICE) | |
with torch.no_grad(): | |
outputs = model(features_tensor) | |
probabilities = torch.softmax(outputs, dim=1) | |
pred_class = torch.argmax(probabilities, dim=1).item() | |
predicted_label = label_encoder.classes_[pred_class] | |
probs_dict = {label_encoder.classes_[i]: probabilities[0][i].item() for i in range(len(label_encoder.classes_))} | |
return { | |
'predicted_class': predicted_label, | |
'probabilities': probs_dict, | |
'features': features_df, | |
'text_analysis': text_analysis, | |
'scores': scores | |
} |