Maaz Uddin
allfilesupload
e0a433a
raw
history blame contribute delete
1.38 kB
from src.preprocessing import Preprocessing
from src.model import ModelBuilder
from tests import test2direct
import pandas as pd
import pickle
import os
def main():
# Load the dataset
data = pd.read_csv("data/bengaluru_house_prices.csv")
# Preprocess the data
print("Starting Data Preprocessing...")
preprocessor = Preprocessing(data)
preprocessor.clean_data()
preprocessor.feature_engineering()
preprocessor.remove_bhk_outliers()
preprocessor.encode_features()
preprocessor.scale_features()
preprocessor.handle_missing_values()
print("Preprocessing completed!")
# Build and evaluate the model
print("Starting Model Building and Evaluation...")
model_builder = ModelBuilder(data=preprocessor.data)
X_train, X_test, y_train, y_test = model_builder.split_data(target_column='price')
model_builder.train_model(X_train, y_train)
mse, r2 = model_builder.evaluate_model(X_test, y_test)
#print(f"Model Evaluation:\nMean Squared Error: {mse}\nR2 Score: {r2}")
# Save the trained model
print("Trained model saved successfully!")
# Save the trained model as a pickle file
model_builder.save_model_as_pickle()
# Save the feature names as a pickle file
model_builder.save_features_as_pickle(data=preprocessor.data)
test2direct.main()
if __name__ == "__main__":
main()