Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
from sklearn.model_selection import train_test_split | |
def load_data(filepath): | |
"""Load data from CSV file""" | |
return pd.read_csv(filepath) | |
def preprocess_data(df, target_column='loan_approved'): | |
"""Preprocess data for model training""" | |
# Split features and target | |
X = df.drop(columns=[target_column]) | |
y = df[target_column] | |
# Identify numeric and categorical columns | |
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns | |
categorical_features = X.select_dtypes(include=['object']).columns | |
# Create preprocessing pipelines | |
numeric_transformer = Pipeline(steps=[ | |
('scaler', StandardScaler()) | |
]) | |
categorical_transformer = Pipeline(steps=[ | |
('onehot', OneHotEncoder(handle_unknown='ignore')) | |
]) | |
# Combine preprocessing steps | |
preprocessor = ColumnTransformer( | |
transformers=[ | |
('num', numeric_transformer, numeric_features), | |
('cat', categorical_transformer, categorical_features) | |
]) | |
# Split data | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
return preprocessor, X_train, X_test, y_train, y_test |