Maaz Uddin
commited on
Commit
·
e0a433a
1
Parent(s):
e7769d4
allfilesupload
Browse files- .gitattributes +1 -35
- .github/workflows/python-app.yml +67 -0
- .gitignore +171 -0
- README.md +184 -9
- data/bengaluru_house_prices.csv +0 -0
- dockerfile +16 -0
- images/homepage.png +0 -0
- images/predicted_results.jpg +0 -0
- images/temp +1 -0
- main.py +46 -0
- models/feature_names.pkl +0 -0
- models/lr_regg.pkl +0 -0
- notebooks/Analysis_notebook.ipynb +0 -0
- setup.py +17 -0
- src/EDA.py +105 -0
- src/model.py +103 -0
- src/preprocessing.py +169 -0
- templates/index.html +114 -0
- templates/results.html +66 -0
- tests/__init__.py +1 -0
- tests/test2direct.py +91 -0
- tests/test_model.py +76 -0
.gitattributes
CHANGED
@@ -1,35 +1 @@
|
|
1 |
-
*.
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.ipynb linguist-detectable=false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/python-app.yml
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Python application
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches: [ "main" ]
|
6 |
+
pull_request:
|
7 |
+
branches: [ "main" ]
|
8 |
+
|
9 |
+
permissions:
|
10 |
+
contents: read
|
11 |
+
|
12 |
+
jobs:
|
13 |
+
build:
|
14 |
+
runs-on: ubuntu-latest
|
15 |
+
|
16 |
+
steps:
|
17 |
+
- uses: actions/checkout@v4
|
18 |
+
|
19 |
+
- name: Set up Python 3.10
|
20 |
+
uses: actions/setup-python@v3
|
21 |
+
with:
|
22 |
+
python-version: "3.10"
|
23 |
+
|
24 |
+
- name: Install dependencies
|
25 |
+
run: |
|
26 |
+
python -m pip install --upgrade pip setuptools wheel
|
27 |
+
pip install flake8 pytest pytest-flask
|
28 |
+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
29 |
+
|
30 |
+
- name: Verify model and template files
|
31 |
+
run: |
|
32 |
+
if [ ! -f "models/lr_regg.pkl" ] || [ ! -f "models/feature_names.pkl" ]; then
|
33 |
+
echo "Model files missing!"
|
34 |
+
exit 1
|
35 |
+
fi
|
36 |
+
if [ ! -d "templates" ] || [ ! -f "templates/index.html" ]; then
|
37 |
+
echo "Template files missing!"
|
38 |
+
exit 1
|
39 |
+
fi
|
40 |
+
|
41 |
+
- name: Lint with flake8
|
42 |
+
run: |
|
43 |
+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
44 |
+
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
45 |
+
|
46 |
+
- name: Test with pytest
|
47 |
+
run: python -m pytest tests/ -v
|
48 |
+
|
49 |
+
- name: Start and Test Flask App
|
50 |
+
run: |
|
51 |
+
python app.py &
|
52 |
+
sleep 10
|
53 |
+
curl --retry 5 --retry-delay 5 --retry-connrefused http://127.0.0.1:5000/ || exit 1
|
54 |
+
pkill -f "python app.py"
|
55 |
+
env:
|
56 |
+
FLASK_ENV: testing
|
57 |
+
FLASK_DEBUG: 0
|
58 |
+
|
59 |
+
- name: Check setup.py
|
60 |
+
run: |
|
61 |
+
if [ -f setup.py ]; then
|
62 |
+
python setup.py check
|
63 |
+
python setup.py sdist bdist_wheel
|
64 |
+
pip install -e .
|
65 |
+
else
|
66 |
+
echo "setup.py not found!"
|
67 |
+
fi
|
.gitignore
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# UV
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
#uv.lock
|
102 |
+
|
103 |
+
# poetry
|
104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106 |
+
# commonly ignored for libraries.
|
107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108 |
+
#poetry.lock
|
109 |
+
|
110 |
+
# pdm
|
111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
112 |
+
#pdm.lock
|
113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
114 |
+
# in version control.
|
115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
116 |
+
.pdm.toml
|
117 |
+
.pdm-python
|
118 |
+
.pdm-build/
|
119 |
+
|
120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
121 |
+
__pypackages__/
|
122 |
+
|
123 |
+
# Celery stuff
|
124 |
+
celerybeat-schedule
|
125 |
+
celerybeat.pid
|
126 |
+
|
127 |
+
# SageMath parsed files
|
128 |
+
*.sage.py
|
129 |
+
|
130 |
+
# Environments
|
131 |
+
.env
|
132 |
+
.venv
|
133 |
+
env/
|
134 |
+
venv/
|
135 |
+
ENV/
|
136 |
+
env.bak/
|
137 |
+
venv.bak/
|
138 |
+
|
139 |
+
# Spyder project settings
|
140 |
+
.spyderproject
|
141 |
+
.spyproject
|
142 |
+
|
143 |
+
# Rope project settings
|
144 |
+
.ropeproject
|
145 |
+
|
146 |
+
# mkdocs documentation
|
147 |
+
/site
|
148 |
+
|
149 |
+
# mypy
|
150 |
+
.mypy_cache/
|
151 |
+
.dmypy.json
|
152 |
+
dmypy.json
|
153 |
+
|
154 |
+
# Pyre type checker
|
155 |
+
.pyre/
|
156 |
+
|
157 |
+
# pytype static type analyzer
|
158 |
+
.pytype/
|
159 |
+
|
160 |
+
# Cython debug symbols
|
161 |
+
cython_debug/
|
162 |
+
|
163 |
+
# PyCharm
|
164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
168 |
+
#.idea/
|
169 |
+
|
170 |
+
# PyPI configuration file
|
171 |
+
.pypirc
|
README.md
CHANGED
@@ -1,12 +1,187 @@
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
colorTo: gray
|
6 |
-
sdk: docker
|
7 |
-
pinned: false
|
8 |
-
license: apache-2.0
|
9 |
-
short_description: 'Get recent treands in Housing and Realestate market '
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
layout: default
|
3 |
+
title: Real Estate Price Prediction API
|
4 |
+
description: A machine learning powered real estate price prediction tool with web interface
|
|
|
|
|
|
|
|
|
|
|
5 |
---
|
6 |
|
7 |
+
# 🏠 **Real Estate Price Prediction API**
|
8 |
+
|
9 |
+
## 🌟 **Project Motto**
|
10 |
+
This project aims to provide an accurate and interactive **Real Estate Price Prediction tool**. Users can input details such as property location, square footage, number of bedrooms, and bathrooms to get an **instant price prediction** based on a trained **machine learning model**.
|
11 |
+
|
12 |
+
This API bridges the gap between **data science** and **user-friendly deployment**, allowing seamless integration of advanced predictions into real-world applications.
|
13 |
+
|
14 |
+
---
|
15 |
+
|
16 |
+
## 💡 **How It Works**
|
17 |
+
|
18 |
+
1. **Data Processing & Model Training**
|
19 |
+
- A dataset of real estate transactions was cleaned and processed.
|
20 |
+
- Key features such as `location`, `total_sqft`, `bath`, and `bhk` were selected.
|
21 |
+
- A **Linear Regression model** was trained and stored as a `.pkl` file for deployment.
|
22 |
+
|
23 |
+
2. **Prediction Mechanism**
|
24 |
+
- The trained model is loaded and predicts property prices based on user inputs.
|
25 |
+
- Location data is one-hot encoded to handle categorical features.
|
26 |
+
|
27 |
+
3. **Interactive Frontend**
|
28 |
+
- A Flask-powered web app provides an intuitive interface for predictions.
|
29 |
+
- Users input details via forms, and results are displayed instantly.
|
30 |
+
|
31 |
+
4. **API Integration**
|
32 |
+
- A `/predict` endpoint allows developers to integrate the model with other applications.
|
33 |
+
---
|
34 |
+
|
35 |
+
## 🎥 Watch the Demo(click image below👇)
|
36 |
+
|
37 |
+
[](https://www.youtube.com/watch?v=NcmXkE907io)
|
38 |
+
|
39 |
+
|
40 |
+
---
|
41 |
+
|
42 |
+
## 📷 **Screenshots**
|
43 |
+
### Home Page
|
44 |
+

|
45 |
+
|
46 |
+
### Prediction Results
|
47 |
+

|
48 |
+
|
49 |
+
---
|
50 |
+
|
51 |
+
## 📂 **Project Structure**
|
52 |
+
|
53 |
+
```
|
54 |
+
├── .github/
|
55 |
+
│ └── workflows/
|
56 |
+
│ └── python-app.yml # CI/CD workflow configuration
|
57 |
+
├── data/ # Dataset directory
|
58 |
+
│ └── bengaluru_house_prices.csv # Dataset file for the project
|
59 |
+
├── models/ # Saved models and feature names
|
60 |
+
│ ├── feature_names.pkl # Pickled feature names
|
61 |
+
│ └── lr_regg.pkl # Trained regression model
|
62 |
+
├── src/ # Source code for the project
|
63 |
+
│ ├── EDA.py # Exploratory Data Analysis script
|
64 |
+
│ ├── model.py # Model training and evaluation script
|
65 |
+
│ └── preprocessing.py # Data preprocessing logic
|
66 |
+
├── templates/ # HTML templates for the Flask web app
|
67 |
+
│ ├── index.html # User input form for predictions
|
68 |
+
│ └── results.html # Displays prediction results
|
69 |
+
├── tests/ # Unit testing for the project
|
70 |
+
│ ├── __init__.py # Marks the directory as a package
|
71 |
+
│ ├── test_model.py # Tests for the model
|
72 |
+
│ └── test2direct.py # Additional test script
|
73 |
+
├── .gitignore # Specifies ignored files for Git
|
74 |
+
├── app.py # Flask application entry point
|
75 |
+
├── main.py # Main execution script
|
76 |
+
├── requirements.txt # List of dependencies for the project
|
77 |
+
├── setup.py # Setup script for packaging the project
|
78 |
+
├── README.md # Project overview and documentation
|
79 |
+
|
80 |
+
```
|
81 |
+
|
82 |
+
---
|
83 |
+
|
84 |
+
## 🚀 **Features**
|
85 |
+
- **Accurate Price Predictions** using a trained regression model.
|
86 |
+
- **Interactive Web Interface** for user-friendly predictions.
|
87 |
+
- **API Integration** for developers to use the model programmatically.
|
88 |
+
- **Scalable and Extendable** to new locations or additional features.
|
89 |
+
|
90 |
+
---
|
91 |
+
|
92 |
+
## 🛠️ **Installation and Setup**
|
93 |
+
|
94 |
+
### Prerequisites
|
95 |
+
- Python 3.8+
|
96 |
+
- Flask
|
97 |
+
- Pickle
|
98 |
+
|
99 |
+
### Installation Steps
|
100 |
+
1. Clone the repository:
|
101 |
+
```bash
|
102 |
+
git clone https://github.com/Maazuddin1/Banglore_RealEstate_forecast-using-CICD-piplines.git
|
103 |
+
cd Banglore_RealEstate_forecast-using-CICD-piplines
|
104 |
+
|
105 |
+
```
|
106 |
+
|
107 |
+
2. Create a virtual environment:
|
108 |
+
```bash
|
109 |
+
python -m venv env
|
110 |
+
source env/bin/activate # Linux/Mac
|
111 |
+
env\Scripts\activate # Windows
|
112 |
+
```
|
113 |
+
|
114 |
+
3. Install dependencies:
|
115 |
+
```bash
|
116 |
+
pip install -r requirements.txt
|
117 |
+
```
|
118 |
+
|
119 |
+
4. Start the Flask application:
|
120 |
+
```bash
|
121 |
+
python app.py
|
122 |
+
```
|
123 |
+
|
124 |
+
5. Open your browser and navigate to `http://127.0.0.1:5000/`.
|
125 |
+
|
126 |
+
---
|
127 |
+
|
128 |
+
## 🌐 **API Usage**
|
129 |
+
|
130 |
+
### Endpoint: `/predict`
|
131 |
+
**Method**: `POST`
|
132 |
+
**Input** (JSON):
|
133 |
+
```json
|
134 |
+
{
|
135 |
+
"location": "Whitefield",
|
136 |
+
"sqft": 1200,
|
137 |
+
"bath": 2,
|
138 |
+
"bhk": 3
|
139 |
+
}
|
140 |
+
```
|
141 |
+
|
142 |
+
**Output**:
|
143 |
+
```json
|
144 |
+
{
|
145 |
+
"predicted_price": 94.23 Lakhs
|
146 |
+
}
|
147 |
+
```
|
148 |
+
|
149 |
+
---
|
150 |
+
|
151 |
+
## 🔍 **Model Details**
|
152 |
+
The trained model uses **Linear Regression** with key features like:
|
153 |
+
- **total_sqft**: Total square footage of the property.
|
154 |
+
- **bath**: Number of bathrooms.
|
155 |
+
- **bhk**: Number of bedrooms.
|
156 |
+
- **Location**: One-hot encoded for categorical support.
|
157 |
+
|
158 |
+
---
|
159 |
+
|
160 |
+
## 📈 **Future Enhancements**
|
161 |
+
- Add support for more advanced machine learning models like Random Forest or XGBoost.
|
162 |
+
- Improve UI design with frameworks like Bootstrap.
|
163 |
+
- Expand location datasets for better predictions.
|
164 |
+
- Add real-time price scraping for dynamic updates.
|
165 |
+
|
166 |
+
---
|
167 |
+
|
168 |
+
## 🖼️ **Visual Workflow**
|
169 |
+
```mermaid
|
170 |
+
graph TD
|
171 |
+
A[User Input] --> B[Flask App]
|
172 |
+
B --> C[Process Input Features]
|
173 |
+
C --> D[Trained ML Model]
|
174 |
+
D --> E[Predict Price]
|
175 |
+
E --> F[Display Results]
|
176 |
+
```
|
177 |
+
|
178 |
+
---
|
179 |
+
|
180 |
+
## 🌟 **Contributions**
|
181 |
+
Contributions are welcome! Feel free to fork this repository, open issues, or submit pull requests.
|
182 |
+
|
183 |
+
---
|
184 |
+
|
185 |
+
## 📄 **License**
|
186 |
+
-
|
187 |
+
---
|
data/bengaluru_house_prices.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
RUN useradd -m -u 1000 user
|
7 |
+
USER user
|
8 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
9 |
+
|
10 |
+
WORKDIR /app
|
11 |
+
|
12 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
13 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
14 |
+
|
15 |
+
COPY --chown=user . /app
|
16 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
images/homepage.png
ADDED
![]() |
images/predicted_results.jpg
ADDED
![]() |
images/temp
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.
|
main.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.preprocessing import Preprocessing
|
2 |
+
from src.model import ModelBuilder
|
3 |
+
from tests import test2direct
|
4 |
+
import pandas as pd
|
5 |
+
import pickle
|
6 |
+
import os
|
7 |
+
|
8 |
+
def main():
|
9 |
+
# Load the dataset
|
10 |
+
data = pd.read_csv("data/bengaluru_house_prices.csv")
|
11 |
+
|
12 |
+
# Preprocess the data
|
13 |
+
print("Starting Data Preprocessing...")
|
14 |
+
preprocessor = Preprocessing(data)
|
15 |
+
preprocessor.clean_data()
|
16 |
+
preprocessor.feature_engineering()
|
17 |
+
preprocessor.remove_bhk_outliers()
|
18 |
+
preprocessor.encode_features()
|
19 |
+
preprocessor.scale_features()
|
20 |
+
preprocessor.handle_missing_values()
|
21 |
+
print("Preprocessing completed!")
|
22 |
+
|
23 |
+
# Build and evaluate the model
|
24 |
+
print("Starting Model Building and Evaluation...")
|
25 |
+
model_builder = ModelBuilder(data=preprocessor.data)
|
26 |
+
X_train, X_test, y_train, y_test = model_builder.split_data(target_column='price')
|
27 |
+
|
28 |
+
model_builder.train_model(X_train, y_train)
|
29 |
+
mse, r2 = model_builder.evaluate_model(X_test, y_test)
|
30 |
+
|
31 |
+
#print(f"Model Evaluation:\nMean Squared Error: {mse}\nR2 Score: {r2}")
|
32 |
+
|
33 |
+
# Save the trained model
|
34 |
+
print("Trained model saved successfully!")
|
35 |
+
|
36 |
+
# Save the trained model as a pickle file
|
37 |
+
model_builder.save_model_as_pickle()
|
38 |
+
|
39 |
+
# Save the feature names as a pickle file
|
40 |
+
model_builder.save_features_as_pickle(data=preprocessor.data)
|
41 |
+
|
42 |
+
test2direct.main()
|
43 |
+
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
main()
|
models/feature_names.pkl
ADDED
Binary file (3.63 kB). View file
|
|
models/lr_regg.pkl
ADDED
Binary file (7.91 kB). View file
|
|
notebooks/Analysis_notebook.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
setup.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(
|
4 |
+
name="Banglore_house_price_estimator",
|
5 |
+
version="1.0",
|
6 |
+
description="A machine learning project for house price prediction in Banglore",
|
7 |
+
author="Maaz uddin",
|
8 |
+
packages=find_packages(),
|
9 |
+
install_requires=[
|
10 |
+
"flask",
|
11 |
+
"pandas",
|
12 |
+
"numpy",
|
13 |
+
"scikit-learn",
|
14 |
+
"seaborn",
|
15 |
+
"matplotlib"
|
16 |
+
]
|
17 |
+
)
|
src/EDA.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import seaborn as sns
|
4 |
+
|
5 |
+
class EDA:
|
6 |
+
def __init__(self, data_path):
|
7 |
+
"""Initialize with dataset path."""
|
8 |
+
self.data_path = data_path
|
9 |
+
self.data = None
|
10 |
+
|
11 |
+
def load_data(self):
|
12 |
+
"""Loads the dataset from the provided path."""
|
13 |
+
self.data = pd.read_csv(self.data_path)
|
14 |
+
return self.data
|
15 |
+
|
16 |
+
def basic_info(self):
|
17 |
+
"""Displays basic information about the dataset."""
|
18 |
+
print("\nDataset Info:\n")
|
19 |
+
print(self.data.info())
|
20 |
+
print("\nShape:", self.data.shape)
|
21 |
+
print("\nMissing Values:\n", self.data.isnull().sum())
|
22 |
+
print("\nDuplicate Rows:", self.data.duplicated().sum())
|
23 |
+
return self.data.describe()
|
24 |
+
|
25 |
+
def missing_value_analysis(self):
|
26 |
+
"""Analyzes and visualizes missing values."""
|
27 |
+
missing_data = self.data.isnull().sum()
|
28 |
+
missing_data = missing_data[missing_data > 0].sort_values(ascending=False)
|
29 |
+
|
30 |
+
if not missing_data.empty:
|
31 |
+
plt.figure(figsize=(8, 6))
|
32 |
+
sns.barplot(x=missing_data.index, y=missing_data.values, palette='viridis')
|
33 |
+
plt.title('Missing Values Count')
|
34 |
+
plt.xticks(rotation=45)
|
35 |
+
plt.ylabel('Count')
|
36 |
+
plt.show()
|
37 |
+
|
38 |
+
return missing_data
|
39 |
+
|
40 |
+
def visualize_distributions(self):
|
41 |
+
"""Visualizes distributions of numerical features."""
|
42 |
+
numeric_cols = self.data.select_dtypes(include=['int64', 'float64']).columns
|
43 |
+
self.data[numeric_cols].hist(bins=15, figsize=(10, 8), color='skyblue', edgecolor='black')
|
44 |
+
plt.suptitle('Feature Distributions', fontsize=16)
|
45 |
+
plt.show()
|
46 |
+
|
47 |
+
def correlation_heatmap(self):
|
48 |
+
"""Plots a heatmap of feature correlations."""
|
49 |
+
plt.figure(figsize=(10, 8))
|
50 |
+
sns.heatmap(self.data.corr(), annot=True, cmap='coolwarm', fmt='.2f')
|
51 |
+
plt.title('Feature Correlation Heatmap')
|
52 |
+
plt.show()
|
53 |
+
|
54 |
+
def detect_outliers(self, feature):
|
55 |
+
"""Detects and visualizes outliers for a given feature."""
|
56 |
+
plt.figure(figsize=(8, 6))
|
57 |
+
sns.boxplot(x=self.data[feature], color='lightblue')
|
58 |
+
plt.title(f'Outliers in {feature}')
|
59 |
+
plt.show()
|
60 |
+
|
61 |
+
def feature_summary(self):
|
62 |
+
"""Provides a summary of categorical and numerical features."""
|
63 |
+
categorical_cols = self.data.select_dtypes(include=['object']).columns
|
64 |
+
numeric_cols = self.data.select_dtypes(include=['int64', 'float64']).columns
|
65 |
+
|
66 |
+
print("\nCategorical Features:")
|
67 |
+
for col in categorical_cols:
|
68 |
+
print(f"{col}: {self.data[col].nunique()} unique values")
|
69 |
+
print(self.data[col].value_counts().head(10))
|
70 |
+
print("---")
|
71 |
+
|
72 |
+
print("\nNumerical Features:")
|
73 |
+
for col in numeric_cols:
|
74 |
+
print(f"{col}: Mean={self.data[col].mean()}, Median={self.data[col].median()}, Std={self.data[col].std()}")
|
75 |
+
print("---")
|
76 |
+
|
77 |
+
def pairwise_scatterplots(self, features):
|
78 |
+
"""Plots scatterplots for selected features."""
|
79 |
+
sns.pairplot(self.data[features], diag_kind='kde', plot_kws={'alpha': 0.5})
|
80 |
+
plt.suptitle('Pairwise Scatterplots', fontsize=16)
|
81 |
+
plt.show()
|
82 |
+
|
83 |
+
def target_analysis(self, target_col):
|
84 |
+
"""Analyzes target variable distribution."""
|
85 |
+
plt.figure(figsize=(8, 6))
|
86 |
+
sns.histplot(self.data[target_col], kde=True, bins=30, color='blue')
|
87 |
+
plt.title(f'Distribution of {target_col}')
|
88 |
+
plt.xlabel(target_col)
|
89 |
+
plt.ylabel('Frequency')
|
90 |
+
plt.show()
|
91 |
+
|
92 |
+
|
93 |
+
if __name__ == "__main__":
|
94 |
+
eda = EDA(data_path="data/bengaluru_house_prices.csv")
|
95 |
+
data = eda.load_data()
|
96 |
+
eda.basic_info()
|
97 |
+
eda.missing_value_analysis()
|
98 |
+
eda.visualize_distributions()
|
99 |
+
eda.correlation_heatmap()
|
100 |
+
eda.detect_outliers('price')
|
101 |
+
eda.feature_summary()
|
102 |
+
eda.pairwise_scatterplots(features=['price', 'total_sqft', 'bath', 'bhk'])
|
103 |
+
eda.target_analysis(target_col='price')
|
104 |
+
print("Missing values summary:")
|
105 |
+
print(eda.missing_value_analysis())
|
src/model.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.model_selection import train_test_split
|
3 |
+
from sklearn.linear_model import LinearRegression
|
4 |
+
from sklearn.metrics import mean_squared_error, r2_score
|
5 |
+
import pickle # Import pickle for saving models
|
6 |
+
import os # Import os for directory operations
|
7 |
+
|
8 |
+
class ModelBuilder:
|
9 |
+
def __init__(self, data):
|
10 |
+
"""Initialize with the dataset."""
|
11 |
+
self.data = data
|
12 |
+
self.model = None
|
13 |
+
|
14 |
+
def split_data(self, target_column, test_size=0.2, random_state=42):
|
15 |
+
"""Splits the data into training and testing sets."""
|
16 |
+
if target_column not in self.data.columns:
|
17 |
+
raise ValueError(f"Target column '{target_column}' not found in the dataset.")
|
18 |
+
|
19 |
+
X = self.data.drop(columns=[target_column])
|
20 |
+
y = self.data[target_column]
|
21 |
+
|
22 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
23 |
+
X, y, test_size=test_size, random_state=random_state
|
24 |
+
)
|
25 |
+
|
26 |
+
#print('x_test:', X_test.head())
|
27 |
+
#print('First 15 column names:', X_test.columns[:15])
|
28 |
+
#print('First 15 column data:', X_test.iloc[:15, :10])
|
29 |
+
print(f"Data split complete: Train size = {len(X_train)}, Test size = {len(X_test)}")
|
30 |
+
return X_train, X_test, y_train, y_test
|
31 |
+
|
32 |
+
def train_model(self, X_train, y_train):
|
33 |
+
"""Trains a Linear Regression model."""
|
34 |
+
self.model = LinearRegression()
|
35 |
+
self.model.fit(X_train, y_train)
|
36 |
+
print("Model training complete.")
|
37 |
+
|
38 |
+
def evaluate_model(self, X_test, y_test):
|
39 |
+
"""Evaluates the model on the test set."""
|
40 |
+
if self.model is None:
|
41 |
+
raise ValueError("Model has not been trained yet.")
|
42 |
+
|
43 |
+
y_pred = self.model.predict(X_test)
|
44 |
+
mse = mean_squared_error(y_test, y_pred)
|
45 |
+
r2 = r2_score(y_test, y_pred)
|
46 |
+
accuracy = self.model.score(X_test, y_test)
|
47 |
+
|
48 |
+
print(f"Model Evaluation:\nMean Squared Error: {mse}\nR2 Score(accuracy): {r2}")
|
49 |
+
return mse, r2
|
50 |
+
|
51 |
+
|
52 |
+
def save_model_as_pickle(self, model_path='models/lr_regg.pkl'):
|
53 |
+
"""Save the trained model as a pickle file."""
|
54 |
+
if self.model is None:
|
55 |
+
raise ValueError("Model has not been trained yet.")
|
56 |
+
|
57 |
+
# Create the models directory if it doesn't exist
|
58 |
+
#os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
59 |
+
|
60 |
+
# Save the model
|
61 |
+
with open(model_path, 'wb') as file:
|
62 |
+
pickle.dump(self.model, file)
|
63 |
+
|
64 |
+
print(f"Model saved as pickle at {model_path}")
|
65 |
+
return model_path
|
66 |
+
|
67 |
+
|
68 |
+
def save_features_as_pickle(self, data, target_column='price', file_path='models/feature_names.pkl'):
|
69 |
+
"""
|
70 |
+
Extract feature names from the data and save them as a pickle file.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
data (pd.DataFrame): Input dataset.
|
74 |
+
target_column (str): Name of the target column to exclude from features.
|
75 |
+
file_path (str): Path to save the pickle file.
|
76 |
+
"""
|
77 |
+
# Ensure the target column exists
|
78 |
+
if target_column not in data.columns:
|
79 |
+
raise ValueError(f"Target column '{target_column}' not found in the dataset.")
|
80 |
+
|
81 |
+
# Drop the target column and extract feature names
|
82 |
+
feature_names = data.drop(columns=[target_column]).columns.tolist()
|
83 |
+
|
84 |
+
# Ensure directory exists
|
85 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
86 |
+
|
87 |
+
# Save the feature names as a pickle file
|
88 |
+
with open(file_path, "wb") as file:
|
89 |
+
pickle.dump(feature_names, file)
|
90 |
+
|
91 |
+
print(f"Feature names saved to {file_path}")
|
92 |
+
|
93 |
+
def load_model_from_pickle(self, model_path):
|
94 |
+
"""Load a model from a pickle file."""
|
95 |
+
if not os.path.exists(model_path):
|
96 |
+
raise FileNotFoundError(f"No model found at {model_path}")
|
97 |
+
|
98 |
+
with open(model_path, 'rb') as file:
|
99 |
+
self.model = pickle.load(file)
|
100 |
+
|
101 |
+
print(f"Model loaded from {model_path}")
|
102 |
+
return self.model
|
103 |
+
|
src/preprocessing.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
4 |
+
from sklearn.model_selection import train_test_split
|
5 |
+
|
6 |
+
class Preprocessing:
|
7 |
+
def __init__(self, data):
|
8 |
+
"""Initialize with the dataset."""
|
9 |
+
self.data = data
|
10 |
+
|
11 |
+
def clean_data(self):
|
12 |
+
"""Cleans and preprocesses the dataset."""
|
13 |
+
# Drop duplicates
|
14 |
+
self.data = self.data.drop_duplicates()
|
15 |
+
self.data = self.data.drop(['area_type', 'availability', 'society', 'balcony'], axis=1)
|
16 |
+
self.data=self.data.dropna()
|
17 |
+
|
18 |
+
# Drop rows with missing target values
|
19 |
+
if 'price' in self.data.columns:
|
20 |
+
self.data = self.data.dropna(subset=['price'])
|
21 |
+
|
22 |
+
# Fill missing values for numerical columns with median
|
23 |
+
numeric_cols = self.data.select_dtypes(include=['int64', 'float64']).columns
|
24 |
+
self.data[numeric_cols] = self.data[numeric_cols].fillna(self.data[numeric_cols].median())
|
25 |
+
|
26 |
+
# Fill missing values for categorical columns with mode
|
27 |
+
categorical_cols = self.data.select_dtypes(include=['object']).columns
|
28 |
+
self.data[categorical_cols] = self.data[categorical_cols].fillna(self.data[categorical_cols].mode().iloc[0])
|
29 |
+
|
30 |
+
# Group rare locations
|
31 |
+
if 'location' in self.data.columns:
|
32 |
+
location_stats = self.data['location'].value_counts()
|
33 |
+
location_stats_lessthan_10 = location_stats[location_stats <= 10]
|
34 |
+
self.data['location'] = self.data['location'].apply(
|
35 |
+
lambda x: 'other' if x in location_stats_lessthan_10 else x
|
36 |
+
)
|
37 |
+
return self.data
|
38 |
+
|
39 |
+
def convert_rangesqft_to_avg(self, x):
|
40 |
+
"""Convert ' - ' separated range sqftarea values to an average."""
|
41 |
+
token = x.split('-')
|
42 |
+
if len(token) == 2:
|
43 |
+
return (float(token[0]) + float(token[1])) / 2
|
44 |
+
try:
|
45 |
+
return float(x)
|
46 |
+
except:
|
47 |
+
return None
|
48 |
+
|
49 |
+
def feature_engineering(self):
|
50 |
+
|
51 |
+
"""Extracts the "integer" from text bhk or many forms from the 'size' column."""
|
52 |
+
self.data['bhk'] = self.data['size'].apply(lambda x: int(x.split(' ')[0]) if isinstance(x, str) else None)
|
53 |
+
del self.data['size'] # Remove the 'size' column
|
54 |
+
|
55 |
+
# Convert 'total_sqft' ranges to average values if the column exists
|
56 |
+
if 'total_sqft' in self.data.columns:
|
57 |
+
self.data['total_sqft'] = self.data['total_sqft'].apply(self.convert_rangesqft_to_avg) # Apply the function to each value
|
58 |
+
|
59 |
+
# Drop rows where 'total_sqft' is less than 300 times the number of bedrooms (bhk)
|
60 |
+
if 'total_sqft' in self.data.columns and 'bhk' in self.data.columns:
|
61 |
+
self.data = self.data[~(self.data['total_sqft'] / self.data['bhk'] < 300)]
|
62 |
+
|
63 |
+
|
64 |
+
if 'bhk' in self.data.columns and 'bath' in self.data.columns:
|
65 |
+
self.data = self.data[self.data['bhk'] + 2 > self.data['bath']]
|
66 |
+
|
67 |
+
"""Creates new features and drops irrelevant ones."""
|
68 |
+
# Create a new feature 'price_per_sqft' if 'total_sqft' and 'price' columns exist
|
69 |
+
if 'total_sqft' in self.data.columns and 'price' in self.data.columns:
|
70 |
+
self.data['price_per_sqft'] = self.data['price']*100000 / self.data['total_sqft']
|
71 |
+
return self.data
|
72 |
+
|
73 |
+
def remove_bhk_outliers(self):
|
74 |
+
"""Removes outliers based on price_per_sqft for bhk values within each location."""
|
75 |
+
exclude_indices = []
|
76 |
+
|
77 |
+
for location, location_df in self.data.groupby('location'):
|
78 |
+
# Calculate statistics for each bhk in the location
|
79 |
+
bhk_stats = {}
|
80 |
+
for bhk, bhk_df in location_df.groupby('bhk'):
|
81 |
+
bhk_stats[bhk] = {
|
82 |
+
'mean': np.mean(bhk_df['price_per_sqft']),
|
83 |
+
'std': np.std(bhk_df['price_per_sqft']),
|
84 |
+
'count': bhk_df.shape[0]
|
85 |
+
}
|
86 |
+
|
87 |
+
# Identify outliers for each bhk in the location
|
88 |
+
for bhk, bhk_df in location_df.groupby('bhk'):
|
89 |
+
stats = bhk_stats.get(bhk - 1)
|
90 |
+
if stats and stats['count'] > 5:
|
91 |
+
exclude_indices.extend(
|
92 |
+
bhk_df[bhk_df['price_per_sqft'] < stats['mean']].index.values
|
93 |
+
)
|
94 |
+
|
95 |
+
# Drop identified outliers
|
96 |
+
self.data = self.data.drop(index=exclude_indices)
|
97 |
+
print(f"Removed {len(exclude_indices)} outliers based on bhk and price_per_sqft.")
|
98 |
+
return self.data
|
99 |
+
|
100 |
+
def encode_features(self):
|
101 |
+
"""Encodes categorical features using pandas.get_dummies for one-hot encoding."""
|
102 |
+
categorical_cols = self.data.select_dtypes(include=['object']).columns
|
103 |
+
if categorical_cols.empty:
|
104 |
+
print("No categorical features found for encoding.")
|
105 |
+
|
106 |
+
return self.data
|
107 |
+
|
108 |
+
# Create one-hot encoded columns for each categorical feature
|
109 |
+
dummies = pd.get_dummies(self.data['location'], drop_first=True)
|
110 |
+
dummies = dummies.astype(int) # Convert to integers for consistency
|
111 |
+
self.data = pd.concat([self.data, dummies], axis=1) # Add dummies to the dataset
|
112 |
+
|
113 |
+
# Drop original location column
|
114 |
+
self.data = self.data.drop(columns=['location'])
|
115 |
+
|
116 |
+
print(f"Categorical features encoded: {len(categorical_cols)}")
|
117 |
+
print(f"New dataset shape after encoding: {self.data.shape}")
|
118 |
+
|
119 |
+
return self.data
|
120 |
+
|
121 |
+
def scale_features(self):
|
122 |
+
"""Scales numerical features using StandardScaler."""
|
123 |
+
scaler = StandardScaler()
|
124 |
+
numeric_cols = self.data.select_dtypes(include=['int64', 'float64']).columns
|
125 |
+
self.data[numeric_cols] = scaler.fit_transform(self.data[numeric_cols])
|
126 |
+
return self.data
|
127 |
+
|
128 |
+
def handle_missing_values(self):
|
129 |
+
"""Handles remaining missing values after scaling."""
|
130 |
+
# Drop rows with missing values
|
131 |
+
self.data = self.data.dropna()
|
132 |
+
return self.data
|
133 |
+
|
134 |
+
def split_data(self, target_column, test_size=0.2, random_state=42):
|
135 |
+
"""Splits the dataset into training and testing sets.
|
136 |
+
|
137 |
+
Args:
|
138 |
+
target_column (str): The column to be used as the target variable.
|
139 |
+
test_size (float): Proportion of the dataset to include in the test split.
|
140 |
+
random_state (int): Random seed for reproducibility.
|
141 |
+
|
142 |
+
Returns:
|
143 |
+
tuple: X_train, X_test, y_train, y_test
|
144 |
+
"""
|
145 |
+
if target_column not in self.data.columns:
|
146 |
+
raise ValueError(f"Target column '{target_column}' not found in the dataset.")
|
147 |
+
|
148 |
+
X = self.data.drop(columns=[target_column])
|
149 |
+
y = self.data[target_column]
|
150 |
+
|
151 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
|
152 |
+
return X_train, X_test, y_train, y_test
|
153 |
+
|
154 |
+
# Example Usage
|
155 |
+
if __name__ == "__main__":
|
156 |
+
df = pd.read_csv("data/bengaluru_house_prices.csv")
|
157 |
+
|
158 |
+
preprocessor = Preprocessing(data=df)
|
159 |
+
# Data preprocessing steps
|
160 |
+
preprocessor.clean_data() # Clean the data
|
161 |
+
preprocessor.feature_engineering() # Perform feature engineering
|
162 |
+
preprocessor.remove_bhk_outliers() # Remove outliers
|
163 |
+
preprocessor.encode_features() # Encode features
|
164 |
+
preprocessor.scale_features() # Scale features
|
165 |
+
preprocessor.handle_missing_values() # Handle remaining missing values
|
166 |
+
print(preprocessor.data.columns.tolist())
|
167 |
+
print(preprocessor.data.shape)
|
168 |
+
print("\nprocessing completed !!!")
|
169 |
+
|
templates/index.html
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>House Price Predictor</title>
|
7 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
|
8 |
+
<style>
|
9 |
+
body {
|
10 |
+
padding: 20px;
|
11 |
+
background-color: #f8f9fa;
|
12 |
+
}
|
13 |
+
.container {
|
14 |
+
max-width: 800px;
|
15 |
+
background-color: white;
|
16 |
+
padding: 30px;
|
17 |
+
border-radius: 10px;
|
18 |
+
box-shadow: 0 0 10px rgba(0,0,0,0.1);
|
19 |
+
margin-top: 50px;
|
20 |
+
}
|
21 |
+
.prediction-result {
|
22 |
+
margin-top: 20px;
|
23 |
+
padding: 20px;
|
24 |
+
border-radius: 5px;
|
25 |
+
background-color: #e9ecef;
|
26 |
+
}
|
27 |
+
.property-details {
|
28 |
+
margin-top: 20px;
|
29 |
+
padding: 15px;
|
30 |
+
border: 1px solid #dee2e6;
|
31 |
+
border-radius: 5px;
|
32 |
+
}
|
33 |
+
</style>
|
34 |
+
</head>
|
35 |
+
<body>
|
36 |
+
<div class="container">
|
37 |
+
<h2 class="text-center mb-4">Bangalore House Price Predictor</h2>
|
38 |
+
|
39 |
+
{% if error %}
|
40 |
+
<div class="alert alert-danger" role="alert">
|
41 |
+
{{ error }}
|
42 |
+
</div>
|
43 |
+
{% endif %}
|
44 |
+
|
45 |
+
<form method="POST" class="needs-validation" novalidate>
|
46 |
+
<div class="mb-3">
|
47 |
+
<label for="location" class="form-label">Location:</label>
|
48 |
+
<select class="form-select" id="location" name="location" required>
|
49 |
+
<option value="">Select a location</option>
|
50 |
+
{% for location in locations %}
|
51 |
+
<option value="{{ location }}">{{ location }}</option>
|
52 |
+
{% endfor %}
|
53 |
+
</select>
|
54 |
+
</div>
|
55 |
+
|
56 |
+
<div class="mb-3">
|
57 |
+
<label for="sqft" class="form-label">Total Square Feet:</label>
|
58 |
+
<input type="number" class="form-control" id="sqft" name="sqft" min="100" required>
|
59 |
+
</div>
|
60 |
+
|
61 |
+
<div class="mb-3">
|
62 |
+
<label for="bath" class="form-label">Number of Bathrooms:</label>
|
63 |
+
<input type="number" class="form-control" id="bath" name="bath" min="1" max="10" required>
|
64 |
+
</div>
|
65 |
+
|
66 |
+
<div class="mb-3">
|
67 |
+
<label for="bhk" class="form-label">BHK (Bedrooms):</label>
|
68 |
+
<input type="number" class="form-control" id="bhk" name="bhk" min="1" max="10" required>
|
69 |
+
</div>
|
70 |
+
|
71 |
+
<div class="text-center">
|
72 |
+
<button type="submit" class="btn btn-primary">Predict Price</button>
|
73 |
+
</div>
|
74 |
+
</form>
|
75 |
+
|
76 |
+
{% if prediction is not none %}
|
77 |
+
<div class="prediction-result text-center">
|
78 |
+
<h4>Predicted Price:</h4>
|
79 |
+
<p class="h3">₹ {{ prediction }} Lakhs</p>
|
80 |
+
|
81 |
+
{% if property_details %}
|
82 |
+
<div class="property-details">
|
83 |
+
<h5>Property Details:</h5>
|
84 |
+
<ul class="list-unstyled">
|
85 |
+
<li><strong>Location:</strong> {{ property_details.location }}</li>
|
86 |
+
<li><strong>Area:</strong> {{ property_details.sqft }} sq.ft</li>
|
87 |
+
<li><strong>Bathrooms:</strong> {{ property_details.bath }}</li>
|
88 |
+
<li><strong>BHK:</strong> {{ property_details.bhk }}</li>
|
89 |
+
</ul>
|
90 |
+
</div>
|
91 |
+
{% endif %}
|
92 |
+
</div>
|
93 |
+
{% endif %}
|
94 |
+
</div>
|
95 |
+
|
96 |
+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
|
97 |
+
<script>
|
98 |
+
// Form validation
|
99 |
+
(function () {
|
100 |
+
'use strict'
|
101 |
+
var forms = document.querySelectorAll('.needs-validation')
|
102 |
+
Array.prototype.slice.call(forms).forEach(function (form) {
|
103 |
+
form.addEventListener('submit', function (event) {
|
104 |
+
if (!form.checkValidity()) {
|
105 |
+
event.preventDefault()
|
106 |
+
event.stopPropagation()
|
107 |
+
}
|
108 |
+
form.classList.add('was-validated')
|
109 |
+
}, false)
|
110 |
+
})
|
111 |
+
})()
|
112 |
+
</script>
|
113 |
+
</body>
|
114 |
+
</html>
|
templates/results.html
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>House Price Prediction Result</title>
|
7 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
|
8 |
+
<style>
|
9 |
+
body {
|
10 |
+
background-color: #f8f9fa;
|
11 |
+
padding: 20px;
|
12 |
+
}
|
13 |
+
.result-container {
|
14 |
+
max-width: 600px;
|
15 |
+
margin: 50px auto;
|
16 |
+
background-color: white;
|
17 |
+
padding: 30px;
|
18 |
+
border-radius: 10px;
|
19 |
+
box-shadow: 0 0 10px rgba(0,0,0,0.1);
|
20 |
+
}
|
21 |
+
.price-display {
|
22 |
+
background-color: #e9ecef;
|
23 |
+
padding: 20px;
|
24 |
+
border-radius: 5px;
|
25 |
+
margin: 20px 0;
|
26 |
+
text-align: center;
|
27 |
+
}
|
28 |
+
.property-details {
|
29 |
+
margin: 20px 0;
|
30 |
+
padding: 15px;
|
31 |
+
border: 1px solid #dee2e6;
|
32 |
+
border-radius: 5px;
|
33 |
+
}
|
34 |
+
.back-button {
|
35 |
+
text-align: center;
|
36 |
+
margin-top: 20px;
|
37 |
+
}
|
38 |
+
</style>
|
39 |
+
</head>
|
40 |
+
<body>
|
41 |
+
<div class="result-container">
|
42 |
+
<h2 class="text-center mb-4">Price Prediction Result</h2>
|
43 |
+
|
44 |
+
<div class="price-display">
|
45 |
+
<h3 class="mb-3">Predicted Price</h3>
|
46 |
+
<h2 class="text-primary">₹ {{ predicted_price }} Lakhs</h2>
|
47 |
+
</div>
|
48 |
+
|
49 |
+
<div class="property-details">
|
50 |
+
<h4>Property Details:</h4>
|
51 |
+
<ul class="list-unstyled">
|
52 |
+
<li><strong>Location:</strong> {{ location }}</li>
|
53 |
+
<li><strong>Area:</strong> {{ sqft }} sq.ft</li>
|
54 |
+
<li><strong>Bathrooms:</strong> {{ bath }}</li>
|
55 |
+
<li><strong>BHK:</strong> {{ bhk }}</li>
|
56 |
+
</ul>
|
57 |
+
</div>
|
58 |
+
|
59 |
+
<div class="back-button">
|
60 |
+
<a href="/" class="btn btn-primary">Make Another Prediction</a>
|
61 |
+
</div>
|
62 |
+
</div>
|
63 |
+
|
64 |
+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
|
65 |
+
</body>
|
66 |
+
</html>
|
tests/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
tests/test2direct.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
def load_model_and_features(model_path, feature_path):
|
5 |
+
"""
|
6 |
+
Load the trained model and feature names from pickle files.
|
7 |
+
|
8 |
+
Args:
|
9 |
+
model_path (str): Path to the trained model pickle file.
|
10 |
+
feature_path (str): Path to the feature names pickle file.
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
tuple: (trained model, feature names)
|
14 |
+
"""
|
15 |
+
# Load the trained model
|
16 |
+
with open(model_path, "rb") as file:
|
17 |
+
model = pickle.load(file)
|
18 |
+
|
19 |
+
# Load the feature names
|
20 |
+
with open(feature_path, "rb") as file:
|
21 |
+
feature_names = pickle.load(file)
|
22 |
+
|
23 |
+
return model, feature_names
|
24 |
+
|
25 |
+
def predict_price(location, sqft, bath, bhk, model, feature_names):
|
26 |
+
"""
|
27 |
+
Predict the price using the trained model.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
location (str): Location name.
|
31 |
+
sqft (float): Total square footage.
|
32 |
+
bath (int): Number of bathrooms.
|
33 |
+
bhk (int): Number of bedrooms.
|
34 |
+
model: Trained model object.
|
35 |
+
feature_names (list): List of feature names.
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
float: Predicted price.
|
39 |
+
"""
|
40 |
+
# Create an input array with zeros for all features
|
41 |
+
x = np.zeros(len(feature_names))
|
42 |
+
|
43 |
+
# Assign values for sqft, bath, and bhk
|
44 |
+
if 'total_sqft' in feature_names:
|
45 |
+
x[feature_names.index('total_sqft')] = sqft
|
46 |
+
if 'bath' in feature_names:
|
47 |
+
x[feature_names.index('bath')] = bath
|
48 |
+
if 'bhk' in feature_names:
|
49 |
+
x[feature_names.index('bhk')] = bhk
|
50 |
+
|
51 |
+
# Set the location column to 1 if it exists in feature names
|
52 |
+
if location in feature_names:
|
53 |
+
loc_index = feature_names.index(location)
|
54 |
+
x[loc_index] = 1
|
55 |
+
|
56 |
+
# Make prediction
|
57 |
+
return model.predict([x])[0]
|
58 |
+
|
59 |
+
def main():
|
60 |
+
# Paths to the model and feature names
|
61 |
+
model_path = "models/lr_regg.pkl"
|
62 |
+
feature_path = "models/feature_names.pkl"
|
63 |
+
|
64 |
+
# Load the model and features
|
65 |
+
model, feature_names = load_model_and_features(model_path, feature_path)
|
66 |
+
|
67 |
+
# Test cases
|
68 |
+
test_cases = [
|
69 |
+
{"location": "Whitefield", "sqft": 1200, "bath": 2, "bhk": 2},
|
70 |
+
{"location": "Banaswadi", "sqft": 1500, "bath": 3, "bhk": 3},
|
71 |
+
{"location": "Basavangudi", "sqft": 1800, "bath": 3, "bhk": 4},
|
72 |
+
{"location": "Nonexistent Location", "sqft": 1000, "bath": 2, "bhk": 3},
|
73 |
+
{"location": "Electronic City Phase II", "sqft": 1056, "bath": 2, "bhk": 2},
|
74 |
+
{"location": "Chikka Tirupathi", "sqft": 800, "bath": 2, "bhk": 2}
|
75 |
+
]
|
76 |
+
|
77 |
+
print("\nPredictions:")
|
78 |
+
for case in test_cases:
|
79 |
+
location = case["location"]
|
80 |
+
sqft = case["sqft"]
|
81 |
+
bath = case["bath"]
|
82 |
+
bhk = case["bhk"]
|
83 |
+
|
84 |
+
try:
|
85 |
+
predicted_price = predict_price(location, sqft, bath, bhk, model, feature_names)
|
86 |
+
print(f"Location: {location}, Sqft: {sqft}, Bath: {bath}, BHK: {bhk} -> Predicted Price: {predicted_price/10:.0f} lakhs")
|
87 |
+
except Exception as e:
|
88 |
+
print(f"Prediction failed for Location: {location}, Error: {e}")
|
89 |
+
|
90 |
+
if __name__ == "__main__":
|
91 |
+
main()
|
tests/test_model.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
# Load model and feature names
|
5 |
+
def load_model_and_features(model_path, feature_path):
|
6 |
+
# Load the trained model
|
7 |
+
with open(model_path, "rb") as file:
|
8 |
+
model = pickle.load(file)
|
9 |
+
|
10 |
+
# Load the feature names
|
11 |
+
with open(feature_path, "rb") as file:
|
12 |
+
feature_names = pickle.load(file)
|
13 |
+
|
14 |
+
return model, feature_names
|
15 |
+
|
16 |
+
# Predict price using the model
|
17 |
+
def predict_price(location, sqft, bath, bhk, model, feature_names):
|
18 |
+
# Create an input array with zeros for all features
|
19 |
+
x = np.zeros(len(feature_names))
|
20 |
+
|
21 |
+
# Assign values for sqft, bath, and bhk
|
22 |
+
if 'total_sqft' in feature_names:
|
23 |
+
x[feature_names.index('total_sqft')] = sqft
|
24 |
+
if 'bath' in feature_names:
|
25 |
+
x[feature_names.index('bath')] = bath
|
26 |
+
if 'bhk' in feature_names:
|
27 |
+
x[feature_names.index('bhk')] = bhk
|
28 |
+
|
29 |
+
# Set the location column to 1 if it exists in feature names
|
30 |
+
if location in feature_names:
|
31 |
+
loc_index = feature_names.index(location)
|
32 |
+
x[loc_index] = 1
|
33 |
+
|
34 |
+
# Make prediction
|
35 |
+
return model.predict([x])[0]
|
36 |
+
|
37 |
+
# Test function
|
38 |
+
def test_house_price_predictions():
|
39 |
+
# Paths to the model and feature names
|
40 |
+
model_path = "models/lr_regg.pkl"
|
41 |
+
feature_path = "models/feature_names.pkl"
|
42 |
+
|
43 |
+
# Load the model and features
|
44 |
+
model, feature_names = load_model_and_features(model_path, feature_path)
|
45 |
+
|
46 |
+
# Test cases and expected outputs
|
47 |
+
test_cases = [
|
48 |
+
{"location": "Whitefield", "sqft": 1200, "bath": 2, "bhk": 2, "expected": 94},
|
49 |
+
{"location": "Banaswadi", "sqft": 1500, "bath": 3, "bhk": 3, "expected": 118},
|
50 |
+
{"location": "Basavangudi", "sqft": 1800, "bath": 3, "bhk": 4, "expected": 142},
|
51 |
+
{"location": "Nonexistent Location", "sqft": 1000, "bath": 2, "bhk": 3, "expected": 79},
|
52 |
+
{"location": "Electronic City Phase II", "sqft": 1056, "bath": 2, "bhk": 2, "expected": 83},
|
53 |
+
{"location": "Chikka Tirupathi", "sqft": 800, "bath": 2, "bhk": 2, "expected": 63}
|
54 |
+
]
|
55 |
+
|
56 |
+
# Run predictions and validate against expected outputs
|
57 |
+
for case in test_cases:
|
58 |
+
location = case["location"]
|
59 |
+
sqft = case["sqft"]
|
60 |
+
bath = case["bath"]
|
61 |
+
bhk = case["bhk"]
|
62 |
+
expected = case["expected"]
|
63 |
+
|
64 |
+
try:
|
65 |
+
predicted_price = predict_price(location, sqft, bath, bhk, model, feature_names)
|
66 |
+
assert round(predicted_price / 10) == expected, (
|
67 |
+
f"Failed for Location: {location}, "
|
68 |
+
f"Expected: {expected}, Got: {predicted_price/10:.0f} lakhs"
|
69 |
+
)
|
70 |
+
print(f"Test Passed: Location: {location}, Predicted: {predicted_price/10:.0f} lakhs")
|
71 |
+
except Exception as e:
|
72 |
+
print(f"Prediction failed for Location: {location}, Error: {e}")
|
73 |
+
|
74 |
+
# Run the tests
|
75 |
+
if __name__ == "__main__":
|
76 |
+
test_house_price_predictions()
|