Maaz Uddin commited on
Commit
e0a433a
·
1 Parent(s): e7769d4

allfilesupload

Browse files
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.ipynb linguist-detectable=false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/python-app.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python application
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ build:
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python 3.10
20
+ uses: actions/setup-python@v3
21
+ with:
22
+ python-version: "3.10"
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip setuptools wheel
27
+ pip install flake8 pytest pytest-flask
28
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
29
+
30
+ - name: Verify model and template files
31
+ run: |
32
+ if [ ! -f "models/lr_regg.pkl" ] || [ ! -f "models/feature_names.pkl" ]; then
33
+ echo "Model files missing!"
34
+ exit 1
35
+ fi
36
+ if [ ! -d "templates" ] || [ ! -f "templates/index.html" ]; then
37
+ echo "Template files missing!"
38
+ exit 1
39
+ fi
40
+
41
+ - name: Lint with flake8
42
+ run: |
43
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
44
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
45
+
46
+ - name: Test with pytest
47
+ run: python -m pytest tests/ -v
48
+
49
+ - name: Start and Test Flask App
50
+ run: |
51
+ python app.py &
52
+ sleep 10
53
+ curl --retry 5 --retry-delay 5 --retry-connrefused http://127.0.0.1:5000/ || exit 1
54
+ pkill -f "python app.py"
55
+ env:
56
+ FLASK_ENV: testing
57
+ FLASK_DEBUG: 0
58
+
59
+ - name: Check setup.py
60
+ run: |
61
+ if [ -f setup.py ]; then
62
+ python setup.py check
63
+ python setup.py sdist bdist_wheel
64
+ pip install -e .
65
+ else
66
+ echo "setup.py not found!"
67
+ fi
.gitignore ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # PyPI configuration file
171
+ .pypirc
README.md CHANGED
@@ -1,12 +1,187 @@
1
  ---
2
- title: Banglore RealEstate Forecast-using-CICD-piplines
3
- emoji: 🏆
4
- colorFrom: pink
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- short_description: 'Get recent treands in Housing and Realestate market '
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ layout: default
3
+ title: Real Estate Price Prediction API
4
+ description: A machine learning powered real estate price prediction tool with web interface
 
 
 
 
 
5
  ---
6
 
7
+ # 🏠 **Real Estate Price Prediction API**
8
+
9
+ ## 🌟 **Project Motto**
10
+ This project aims to provide an accurate and interactive **Real Estate Price Prediction tool**. Users can input details such as property location, square footage, number of bedrooms, and bathrooms to get an **instant price prediction** based on a trained **machine learning model**.
11
+
12
+ This API bridges the gap between **data science** and **user-friendly deployment**, allowing seamless integration of advanced predictions into real-world applications.
13
+
14
+ ---
15
+
16
+ ## 💡 **How It Works**
17
+
18
+ 1. **Data Processing & Model Training**
19
+ - A dataset of real estate transactions was cleaned and processed.
20
+ - Key features such as `location`, `total_sqft`, `bath`, and `bhk` were selected.
21
+ - A **Linear Regression model** was trained and stored as a `.pkl` file for deployment.
22
+
23
+ 2. **Prediction Mechanism**
24
+ - The trained model is loaded and predicts property prices based on user inputs.
25
+ - Location data is one-hot encoded to handle categorical features.
26
+
27
+ 3. **Interactive Frontend**
28
+ - A Flask-powered web app provides an intuitive interface for predictions.
29
+ - Users input details via forms, and results are displayed instantly.
30
+
31
+ 4. **API Integration**
32
+ - A `/predict` endpoint allows developers to integrate the model with other applications.
33
+ ---
34
+
35
+ ## 🎥 Watch the Demo(click image below👇)
36
+
37
+ [![Watch on YouTube](https://img.youtube.com/vi/NcmXkE907io/0.jpg)](https://www.youtube.com/watch?v=NcmXkE907io)
38
+
39
+
40
+ ---
41
+
42
+ ## 📷 **Screenshots**
43
+ ### Home Page
44
+ ![Home Page](images/homepage.png)
45
+
46
+ ### Prediction Results
47
+ ![Prediction Result](images/predicted_results.jpg)
48
+
49
+ ---
50
+
51
+ ## 📂 **Project Structure**
52
+
53
+ ```
54
+ ├── .github/
55
+ │ └── workflows/
56
+ │ └── python-app.yml # CI/CD workflow configuration
57
+ ├── data/ # Dataset directory
58
+ │ └── bengaluru_house_prices.csv # Dataset file for the project
59
+ ├── models/ # Saved models and feature names
60
+ │ ├── feature_names.pkl # Pickled feature names
61
+ │ └── lr_regg.pkl # Trained regression model
62
+ ├── src/ # Source code for the project
63
+ │ ├── EDA.py # Exploratory Data Analysis script
64
+ │ ├── model.py # Model training and evaluation script
65
+ │ └── preprocessing.py # Data preprocessing logic
66
+ ├── templates/ # HTML templates for the Flask web app
67
+ │ ├── index.html # User input form for predictions
68
+ │ └── results.html # Displays prediction results
69
+ ├── tests/ # Unit testing for the project
70
+ │ ├── __init__.py # Marks the directory as a package
71
+ │ ├── test_model.py # Tests for the model
72
+ │ └── test2direct.py # Additional test script
73
+ ├── .gitignore # Specifies ignored files for Git
74
+ ├── app.py # Flask application entry point
75
+ ├── main.py # Main execution script
76
+ ├── requirements.txt # List of dependencies for the project
77
+ ├── setup.py # Setup script for packaging the project
78
+ ├── README.md # Project overview and documentation
79
+
80
+ ```
81
+
82
+ ---
83
+
84
+ ## 🚀 **Features**
85
+ - **Accurate Price Predictions** using a trained regression model.
86
+ - **Interactive Web Interface** for user-friendly predictions.
87
+ - **API Integration** for developers to use the model programmatically.
88
+ - **Scalable and Extendable** to new locations or additional features.
89
+
90
+ ---
91
+
92
+ ## 🛠️ **Installation and Setup**
93
+
94
+ ### Prerequisites
95
+ - Python 3.8+
96
+ - Flask
97
+ - Pickle
98
+
99
+ ### Installation Steps
100
+ 1. Clone the repository:
101
+ ```bash
102
+ git clone https://github.com/Maazuddin1/Banglore_RealEstate_forecast-using-CICD-piplines.git
103
+ cd Banglore_RealEstate_forecast-using-CICD-piplines
104
+
105
+ ```
106
+
107
+ 2. Create a virtual environment:
108
+ ```bash
109
+ python -m venv env
110
+ source env/bin/activate # Linux/Mac
111
+ env\Scripts\activate # Windows
112
+ ```
113
+
114
+ 3. Install dependencies:
115
+ ```bash
116
+ pip install -r requirements.txt
117
+ ```
118
+
119
+ 4. Start the Flask application:
120
+ ```bash
121
+ python app.py
122
+ ```
123
+
124
+ 5. Open your browser and navigate to `http://127.0.0.1:5000/`.
125
+
126
+ ---
127
+
128
+ ## 🌐 **API Usage**
129
+
130
+ ### Endpoint: `/predict`
131
+ **Method**: `POST`
132
+ **Input** (JSON):
133
+ ```json
134
+ {
135
+ "location": "Whitefield",
136
+ "sqft": 1200,
137
+ "bath": 2,
138
+ "bhk": 3
139
+ }
140
+ ```
141
+
142
+ **Output**:
143
+ ```json
144
+ {
145
+ "predicted_price": 94.23 Lakhs
146
+ }
147
+ ```
148
+
149
+ ---
150
+
151
+ ## 🔍 **Model Details**
152
+ The trained model uses **Linear Regression** with key features like:
153
+ - **total_sqft**: Total square footage of the property.
154
+ - **bath**: Number of bathrooms.
155
+ - **bhk**: Number of bedrooms.
156
+ - **Location**: One-hot encoded for categorical support.
157
+
158
+ ---
159
+
160
+ ## 📈 **Future Enhancements**
161
+ - Add support for more advanced machine learning models like Random Forest or XGBoost.
162
+ - Improve UI design with frameworks like Bootstrap.
163
+ - Expand location datasets for better predictions.
164
+ - Add real-time price scraping for dynamic updates.
165
+
166
+ ---
167
+
168
+ ## 🖼️ **Visual Workflow**
169
+ ```mermaid
170
+ graph TD
171
+ A[User Input] --> B[Flask App]
172
+ B --> C[Process Input Features]
173
+ C --> D[Trained ML Model]
174
+ D --> E[Predict Price]
175
+ E --> F[Display Results]
176
+ ```
177
+
178
+ ---
179
+
180
+ ## 🌟 **Contributions**
181
+ Contributions are welcome! Feel free to fork this repository, open issues, or submit pull requests.
182
+
183
+ ---
184
+
185
+ ## 📄 **License**
186
+ -
187
+ ---
data/bengaluru_house_prices.csv ADDED
The diff for this file is too large to render. See raw diff
 
dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
images/homepage.png ADDED
images/predicted_results.jpg ADDED
images/temp ADDED
@@ -0,0 +1 @@
 
 
1
+ .
main.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.preprocessing import Preprocessing
2
+ from src.model import ModelBuilder
3
+ from tests import test2direct
4
+ import pandas as pd
5
+ import pickle
6
+ import os
7
+
8
+ def main():
9
+ # Load the dataset
10
+ data = pd.read_csv("data/bengaluru_house_prices.csv")
11
+
12
+ # Preprocess the data
13
+ print("Starting Data Preprocessing...")
14
+ preprocessor = Preprocessing(data)
15
+ preprocessor.clean_data()
16
+ preprocessor.feature_engineering()
17
+ preprocessor.remove_bhk_outliers()
18
+ preprocessor.encode_features()
19
+ preprocessor.scale_features()
20
+ preprocessor.handle_missing_values()
21
+ print("Preprocessing completed!")
22
+
23
+ # Build and evaluate the model
24
+ print("Starting Model Building and Evaluation...")
25
+ model_builder = ModelBuilder(data=preprocessor.data)
26
+ X_train, X_test, y_train, y_test = model_builder.split_data(target_column='price')
27
+
28
+ model_builder.train_model(X_train, y_train)
29
+ mse, r2 = model_builder.evaluate_model(X_test, y_test)
30
+
31
+ #print(f"Model Evaluation:\nMean Squared Error: {mse}\nR2 Score: {r2}")
32
+
33
+ # Save the trained model
34
+ print("Trained model saved successfully!")
35
+
36
+ # Save the trained model as a pickle file
37
+ model_builder.save_model_as_pickle()
38
+
39
+ # Save the feature names as a pickle file
40
+ model_builder.save_features_as_pickle(data=preprocessor.data)
41
+
42
+ test2direct.main()
43
+
44
+
45
+ if __name__ == "__main__":
46
+ main()
models/feature_names.pkl ADDED
Binary file (3.63 kB). View file
 
models/lr_regg.pkl ADDED
Binary file (7.91 kB). View file
 
notebooks/Analysis_notebook.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
setup.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="Banglore_house_price_estimator",
5
+ version="1.0",
6
+ description="A machine learning project for house price prediction in Banglore",
7
+ author="Maaz uddin",
8
+ packages=find_packages(),
9
+ install_requires=[
10
+ "flask",
11
+ "pandas",
12
+ "numpy",
13
+ "scikit-learn",
14
+ "seaborn",
15
+ "matplotlib"
16
+ ]
17
+ )
src/EDA.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import seaborn as sns
4
+
5
+ class EDA:
6
+ def __init__(self, data_path):
7
+ """Initialize with dataset path."""
8
+ self.data_path = data_path
9
+ self.data = None
10
+
11
+ def load_data(self):
12
+ """Loads the dataset from the provided path."""
13
+ self.data = pd.read_csv(self.data_path)
14
+ return self.data
15
+
16
+ def basic_info(self):
17
+ """Displays basic information about the dataset."""
18
+ print("\nDataset Info:\n")
19
+ print(self.data.info())
20
+ print("\nShape:", self.data.shape)
21
+ print("\nMissing Values:\n", self.data.isnull().sum())
22
+ print("\nDuplicate Rows:", self.data.duplicated().sum())
23
+ return self.data.describe()
24
+
25
+ def missing_value_analysis(self):
26
+ """Analyzes and visualizes missing values."""
27
+ missing_data = self.data.isnull().sum()
28
+ missing_data = missing_data[missing_data > 0].sort_values(ascending=False)
29
+
30
+ if not missing_data.empty:
31
+ plt.figure(figsize=(8, 6))
32
+ sns.barplot(x=missing_data.index, y=missing_data.values, palette='viridis')
33
+ plt.title('Missing Values Count')
34
+ plt.xticks(rotation=45)
35
+ plt.ylabel('Count')
36
+ plt.show()
37
+
38
+ return missing_data
39
+
40
+ def visualize_distributions(self):
41
+ """Visualizes distributions of numerical features."""
42
+ numeric_cols = self.data.select_dtypes(include=['int64', 'float64']).columns
43
+ self.data[numeric_cols].hist(bins=15, figsize=(10, 8), color='skyblue', edgecolor='black')
44
+ plt.suptitle('Feature Distributions', fontsize=16)
45
+ plt.show()
46
+
47
+ def correlation_heatmap(self):
48
+ """Plots a heatmap of feature correlations."""
49
+ plt.figure(figsize=(10, 8))
50
+ sns.heatmap(self.data.corr(), annot=True, cmap='coolwarm', fmt='.2f')
51
+ plt.title('Feature Correlation Heatmap')
52
+ plt.show()
53
+
54
+ def detect_outliers(self, feature):
55
+ """Detects and visualizes outliers for a given feature."""
56
+ plt.figure(figsize=(8, 6))
57
+ sns.boxplot(x=self.data[feature], color='lightblue')
58
+ plt.title(f'Outliers in {feature}')
59
+ plt.show()
60
+
61
+ def feature_summary(self):
62
+ """Provides a summary of categorical and numerical features."""
63
+ categorical_cols = self.data.select_dtypes(include=['object']).columns
64
+ numeric_cols = self.data.select_dtypes(include=['int64', 'float64']).columns
65
+
66
+ print("\nCategorical Features:")
67
+ for col in categorical_cols:
68
+ print(f"{col}: {self.data[col].nunique()} unique values")
69
+ print(self.data[col].value_counts().head(10))
70
+ print("---")
71
+
72
+ print("\nNumerical Features:")
73
+ for col in numeric_cols:
74
+ print(f"{col}: Mean={self.data[col].mean()}, Median={self.data[col].median()}, Std={self.data[col].std()}")
75
+ print("---")
76
+
77
+ def pairwise_scatterplots(self, features):
78
+ """Plots scatterplots for selected features."""
79
+ sns.pairplot(self.data[features], diag_kind='kde', plot_kws={'alpha': 0.5})
80
+ plt.suptitle('Pairwise Scatterplots', fontsize=16)
81
+ plt.show()
82
+
83
+ def target_analysis(self, target_col):
84
+ """Analyzes target variable distribution."""
85
+ plt.figure(figsize=(8, 6))
86
+ sns.histplot(self.data[target_col], kde=True, bins=30, color='blue')
87
+ plt.title(f'Distribution of {target_col}')
88
+ plt.xlabel(target_col)
89
+ plt.ylabel('Frequency')
90
+ plt.show()
91
+
92
+
93
+ if __name__ == "__main__":
94
+ eda = EDA(data_path="data/bengaluru_house_prices.csv")
95
+ data = eda.load_data()
96
+ eda.basic_info()
97
+ eda.missing_value_analysis()
98
+ eda.visualize_distributions()
99
+ eda.correlation_heatmap()
100
+ eda.detect_outliers('price')
101
+ eda.feature_summary()
102
+ eda.pairwise_scatterplots(features=['price', 'total_sqft', 'bath', 'bhk'])
103
+ eda.target_analysis(target_col='price')
104
+ print("Missing values summary:")
105
+ print(eda.missing_value_analysis())
src/model.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.linear_model import LinearRegression
4
+ from sklearn.metrics import mean_squared_error, r2_score
5
+ import pickle # Import pickle for saving models
6
+ import os # Import os for directory operations
7
+
8
+ class ModelBuilder:
9
+ def __init__(self, data):
10
+ """Initialize with the dataset."""
11
+ self.data = data
12
+ self.model = None
13
+
14
+ def split_data(self, target_column, test_size=0.2, random_state=42):
15
+ """Splits the data into training and testing sets."""
16
+ if target_column not in self.data.columns:
17
+ raise ValueError(f"Target column '{target_column}' not found in the dataset.")
18
+
19
+ X = self.data.drop(columns=[target_column])
20
+ y = self.data[target_column]
21
+
22
+ X_train, X_test, y_train, y_test = train_test_split(
23
+ X, y, test_size=test_size, random_state=random_state
24
+ )
25
+
26
+ #print('x_test:', X_test.head())
27
+ #print('First 15 column names:', X_test.columns[:15])
28
+ #print('First 15 column data:', X_test.iloc[:15, :10])
29
+ print(f"Data split complete: Train size = {len(X_train)}, Test size = {len(X_test)}")
30
+ return X_train, X_test, y_train, y_test
31
+
32
+ def train_model(self, X_train, y_train):
33
+ """Trains a Linear Regression model."""
34
+ self.model = LinearRegression()
35
+ self.model.fit(X_train, y_train)
36
+ print("Model training complete.")
37
+
38
+ def evaluate_model(self, X_test, y_test):
39
+ """Evaluates the model on the test set."""
40
+ if self.model is None:
41
+ raise ValueError("Model has not been trained yet.")
42
+
43
+ y_pred = self.model.predict(X_test)
44
+ mse = mean_squared_error(y_test, y_pred)
45
+ r2 = r2_score(y_test, y_pred)
46
+ accuracy = self.model.score(X_test, y_test)
47
+
48
+ print(f"Model Evaluation:\nMean Squared Error: {mse}\nR2 Score(accuracy): {r2}")
49
+ return mse, r2
50
+
51
+
52
+ def save_model_as_pickle(self, model_path='models/lr_regg.pkl'):
53
+ """Save the trained model as a pickle file."""
54
+ if self.model is None:
55
+ raise ValueError("Model has not been trained yet.")
56
+
57
+ # Create the models directory if it doesn't exist
58
+ #os.makedirs(os.path.dirname(model_path), exist_ok=True)
59
+
60
+ # Save the model
61
+ with open(model_path, 'wb') as file:
62
+ pickle.dump(self.model, file)
63
+
64
+ print(f"Model saved as pickle at {model_path}")
65
+ return model_path
66
+
67
+
68
+ def save_features_as_pickle(self, data, target_column='price', file_path='models/feature_names.pkl'):
69
+ """
70
+ Extract feature names from the data and save them as a pickle file.
71
+
72
+ Args:
73
+ data (pd.DataFrame): Input dataset.
74
+ target_column (str): Name of the target column to exclude from features.
75
+ file_path (str): Path to save the pickle file.
76
+ """
77
+ # Ensure the target column exists
78
+ if target_column not in data.columns:
79
+ raise ValueError(f"Target column '{target_column}' not found in the dataset.")
80
+
81
+ # Drop the target column and extract feature names
82
+ feature_names = data.drop(columns=[target_column]).columns.tolist()
83
+
84
+ # Ensure directory exists
85
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
86
+
87
+ # Save the feature names as a pickle file
88
+ with open(file_path, "wb") as file:
89
+ pickle.dump(feature_names, file)
90
+
91
+ print(f"Feature names saved to {file_path}")
92
+
93
+ def load_model_from_pickle(self, model_path):
94
+ """Load a model from a pickle file."""
95
+ if not os.path.exists(model_path):
96
+ raise FileNotFoundError(f"No model found at {model_path}")
97
+
98
+ with open(model_path, 'rb') as file:
99
+ self.model = pickle.load(file)
100
+
101
+ print(f"Model loaded from {model_path}")
102
+ return self.model
103
+
src/preprocessing.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ class Preprocessing:
7
+ def __init__(self, data):
8
+ """Initialize with the dataset."""
9
+ self.data = data
10
+
11
+ def clean_data(self):
12
+ """Cleans and preprocesses the dataset."""
13
+ # Drop duplicates
14
+ self.data = self.data.drop_duplicates()
15
+ self.data = self.data.drop(['area_type', 'availability', 'society', 'balcony'], axis=1)
16
+ self.data=self.data.dropna()
17
+
18
+ # Drop rows with missing target values
19
+ if 'price' in self.data.columns:
20
+ self.data = self.data.dropna(subset=['price'])
21
+
22
+ # Fill missing values for numerical columns with median
23
+ numeric_cols = self.data.select_dtypes(include=['int64', 'float64']).columns
24
+ self.data[numeric_cols] = self.data[numeric_cols].fillna(self.data[numeric_cols].median())
25
+
26
+ # Fill missing values for categorical columns with mode
27
+ categorical_cols = self.data.select_dtypes(include=['object']).columns
28
+ self.data[categorical_cols] = self.data[categorical_cols].fillna(self.data[categorical_cols].mode().iloc[0])
29
+
30
+ # Group rare locations
31
+ if 'location' in self.data.columns:
32
+ location_stats = self.data['location'].value_counts()
33
+ location_stats_lessthan_10 = location_stats[location_stats <= 10]
34
+ self.data['location'] = self.data['location'].apply(
35
+ lambda x: 'other' if x in location_stats_lessthan_10 else x
36
+ )
37
+ return self.data
38
+
39
+ def convert_rangesqft_to_avg(self, x):
40
+ """Convert ' - ' separated range sqftarea values to an average."""
41
+ token = x.split('-')
42
+ if len(token) == 2:
43
+ return (float(token[0]) + float(token[1])) / 2
44
+ try:
45
+ return float(x)
46
+ except:
47
+ return None
48
+
49
+ def feature_engineering(self):
50
+
51
+ """Extracts the "integer" from text bhk or many forms from the 'size' column."""
52
+ self.data['bhk'] = self.data['size'].apply(lambda x: int(x.split(' ')[0]) if isinstance(x, str) else None)
53
+ del self.data['size'] # Remove the 'size' column
54
+
55
+ # Convert 'total_sqft' ranges to average values if the column exists
56
+ if 'total_sqft' in self.data.columns:
57
+ self.data['total_sqft'] = self.data['total_sqft'].apply(self.convert_rangesqft_to_avg) # Apply the function to each value
58
+
59
+ # Drop rows where 'total_sqft' is less than 300 times the number of bedrooms (bhk)
60
+ if 'total_sqft' in self.data.columns and 'bhk' in self.data.columns:
61
+ self.data = self.data[~(self.data['total_sqft'] / self.data['bhk'] < 300)]
62
+
63
+
64
+ if 'bhk' in self.data.columns and 'bath' in self.data.columns:
65
+ self.data = self.data[self.data['bhk'] + 2 > self.data['bath']]
66
+
67
+ """Creates new features and drops irrelevant ones."""
68
+ # Create a new feature 'price_per_sqft' if 'total_sqft' and 'price' columns exist
69
+ if 'total_sqft' in self.data.columns and 'price' in self.data.columns:
70
+ self.data['price_per_sqft'] = self.data['price']*100000 / self.data['total_sqft']
71
+ return self.data
72
+
73
+ def remove_bhk_outliers(self):
74
+ """Removes outliers based on price_per_sqft for bhk values within each location."""
75
+ exclude_indices = []
76
+
77
+ for location, location_df in self.data.groupby('location'):
78
+ # Calculate statistics for each bhk in the location
79
+ bhk_stats = {}
80
+ for bhk, bhk_df in location_df.groupby('bhk'):
81
+ bhk_stats[bhk] = {
82
+ 'mean': np.mean(bhk_df['price_per_sqft']),
83
+ 'std': np.std(bhk_df['price_per_sqft']),
84
+ 'count': bhk_df.shape[0]
85
+ }
86
+
87
+ # Identify outliers for each bhk in the location
88
+ for bhk, bhk_df in location_df.groupby('bhk'):
89
+ stats = bhk_stats.get(bhk - 1)
90
+ if stats and stats['count'] > 5:
91
+ exclude_indices.extend(
92
+ bhk_df[bhk_df['price_per_sqft'] < stats['mean']].index.values
93
+ )
94
+
95
+ # Drop identified outliers
96
+ self.data = self.data.drop(index=exclude_indices)
97
+ print(f"Removed {len(exclude_indices)} outliers based on bhk and price_per_sqft.")
98
+ return self.data
99
+
100
+ def encode_features(self):
101
+ """Encodes categorical features using pandas.get_dummies for one-hot encoding."""
102
+ categorical_cols = self.data.select_dtypes(include=['object']).columns
103
+ if categorical_cols.empty:
104
+ print("No categorical features found for encoding.")
105
+
106
+ return self.data
107
+
108
+ # Create one-hot encoded columns for each categorical feature
109
+ dummies = pd.get_dummies(self.data['location'], drop_first=True)
110
+ dummies = dummies.astype(int) # Convert to integers for consistency
111
+ self.data = pd.concat([self.data, dummies], axis=1) # Add dummies to the dataset
112
+
113
+ # Drop original location column
114
+ self.data = self.data.drop(columns=['location'])
115
+
116
+ print(f"Categorical features encoded: {len(categorical_cols)}")
117
+ print(f"New dataset shape after encoding: {self.data.shape}")
118
+
119
+ return self.data
120
+
121
+ def scale_features(self):
122
+ """Scales numerical features using StandardScaler."""
123
+ scaler = StandardScaler()
124
+ numeric_cols = self.data.select_dtypes(include=['int64', 'float64']).columns
125
+ self.data[numeric_cols] = scaler.fit_transform(self.data[numeric_cols])
126
+ return self.data
127
+
128
+ def handle_missing_values(self):
129
+ """Handles remaining missing values after scaling."""
130
+ # Drop rows with missing values
131
+ self.data = self.data.dropna()
132
+ return self.data
133
+
134
+ def split_data(self, target_column, test_size=0.2, random_state=42):
135
+ """Splits the dataset into training and testing sets.
136
+
137
+ Args:
138
+ target_column (str): The column to be used as the target variable.
139
+ test_size (float): Proportion of the dataset to include in the test split.
140
+ random_state (int): Random seed for reproducibility.
141
+
142
+ Returns:
143
+ tuple: X_train, X_test, y_train, y_test
144
+ """
145
+ if target_column not in self.data.columns:
146
+ raise ValueError(f"Target column '{target_column}' not found in the dataset.")
147
+
148
+ X = self.data.drop(columns=[target_column])
149
+ y = self.data[target_column]
150
+
151
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
152
+ return X_train, X_test, y_train, y_test
153
+
154
+ # Example Usage
155
+ if __name__ == "__main__":
156
+ df = pd.read_csv("data/bengaluru_house_prices.csv")
157
+
158
+ preprocessor = Preprocessing(data=df)
159
+ # Data preprocessing steps
160
+ preprocessor.clean_data() # Clean the data
161
+ preprocessor.feature_engineering() # Perform feature engineering
162
+ preprocessor.remove_bhk_outliers() # Remove outliers
163
+ preprocessor.encode_features() # Encode features
164
+ preprocessor.scale_features() # Scale features
165
+ preprocessor.handle_missing_values() # Handle remaining missing values
166
+ print(preprocessor.data.columns.tolist())
167
+ print(preprocessor.data.shape)
168
+ print("\nprocessing completed !!!")
169
+
templates/index.html ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>House Price Predictor</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <style>
9
+ body {
10
+ padding: 20px;
11
+ background-color: #f8f9fa;
12
+ }
13
+ .container {
14
+ max-width: 800px;
15
+ background-color: white;
16
+ padding: 30px;
17
+ border-radius: 10px;
18
+ box-shadow: 0 0 10px rgba(0,0,0,0.1);
19
+ margin-top: 50px;
20
+ }
21
+ .prediction-result {
22
+ margin-top: 20px;
23
+ padding: 20px;
24
+ border-radius: 5px;
25
+ background-color: #e9ecef;
26
+ }
27
+ .property-details {
28
+ margin-top: 20px;
29
+ padding: 15px;
30
+ border: 1px solid #dee2e6;
31
+ border-radius: 5px;
32
+ }
33
+ </style>
34
+ </head>
35
+ <body>
36
+ <div class="container">
37
+ <h2 class="text-center mb-4">Bangalore House Price Predictor</h2>
38
+
39
+ {% if error %}
40
+ <div class="alert alert-danger" role="alert">
41
+ {{ error }}
42
+ </div>
43
+ {% endif %}
44
+
45
+ <form method="POST" class="needs-validation" novalidate>
46
+ <div class="mb-3">
47
+ <label for="location" class="form-label">Location:</label>
48
+ <select class="form-select" id="location" name="location" required>
49
+ <option value="">Select a location</option>
50
+ {% for location in locations %}
51
+ <option value="{{ location }}">{{ location }}</option>
52
+ {% endfor %}
53
+ </select>
54
+ </div>
55
+
56
+ <div class="mb-3">
57
+ <label for="sqft" class="form-label">Total Square Feet:</label>
58
+ <input type="number" class="form-control" id="sqft" name="sqft" min="100" required>
59
+ </div>
60
+
61
+ <div class="mb-3">
62
+ <label for="bath" class="form-label">Number of Bathrooms:</label>
63
+ <input type="number" class="form-control" id="bath" name="bath" min="1" max="10" required>
64
+ </div>
65
+
66
+ <div class="mb-3">
67
+ <label for="bhk" class="form-label">BHK (Bedrooms):</label>
68
+ <input type="number" class="form-control" id="bhk" name="bhk" min="1" max="10" required>
69
+ </div>
70
+
71
+ <div class="text-center">
72
+ <button type="submit" class="btn btn-primary">Predict Price</button>
73
+ </div>
74
+ </form>
75
+
76
+ {% if prediction is not none %}
77
+ <div class="prediction-result text-center">
78
+ <h4>Predicted Price:</h4>
79
+ <p class="h3">₹ {{ prediction }} Lakhs</p>
80
+
81
+ {% if property_details %}
82
+ <div class="property-details">
83
+ <h5>Property Details:</h5>
84
+ <ul class="list-unstyled">
85
+ <li><strong>Location:</strong> {{ property_details.location }}</li>
86
+ <li><strong>Area:</strong> {{ property_details.sqft }} sq.ft</li>
87
+ <li><strong>Bathrooms:</strong> {{ property_details.bath }}</li>
88
+ <li><strong>BHK:</strong> {{ property_details.bhk }}</li>
89
+ </ul>
90
+ </div>
91
+ {% endif %}
92
+ </div>
93
+ {% endif %}
94
+ </div>
95
+
96
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
97
+ <script>
98
+ // Form validation
99
+ (function () {
100
+ 'use strict'
101
+ var forms = document.querySelectorAll('.needs-validation')
102
+ Array.prototype.slice.call(forms).forEach(function (form) {
103
+ form.addEventListener('submit', function (event) {
104
+ if (!form.checkValidity()) {
105
+ event.preventDefault()
106
+ event.stopPropagation()
107
+ }
108
+ form.classList.add('was-validated')
109
+ }, false)
110
+ })
111
+ })()
112
+ </script>
113
+ </body>
114
+ </html>
templates/results.html ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>House Price Prediction Result</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <style>
9
+ body {
10
+ background-color: #f8f9fa;
11
+ padding: 20px;
12
+ }
13
+ .result-container {
14
+ max-width: 600px;
15
+ margin: 50px auto;
16
+ background-color: white;
17
+ padding: 30px;
18
+ border-radius: 10px;
19
+ box-shadow: 0 0 10px rgba(0,0,0,0.1);
20
+ }
21
+ .price-display {
22
+ background-color: #e9ecef;
23
+ padding: 20px;
24
+ border-radius: 5px;
25
+ margin: 20px 0;
26
+ text-align: center;
27
+ }
28
+ .property-details {
29
+ margin: 20px 0;
30
+ padding: 15px;
31
+ border: 1px solid #dee2e6;
32
+ border-radius: 5px;
33
+ }
34
+ .back-button {
35
+ text-align: center;
36
+ margin-top: 20px;
37
+ }
38
+ </style>
39
+ </head>
40
+ <body>
41
+ <div class="result-container">
42
+ <h2 class="text-center mb-4">Price Prediction Result</h2>
43
+
44
+ <div class="price-display">
45
+ <h3 class="mb-3">Predicted Price</h3>
46
+ <h2 class="text-primary">₹ {{ predicted_price }} Lakhs</h2>
47
+ </div>
48
+
49
+ <div class="property-details">
50
+ <h4>Property Details:</h4>
51
+ <ul class="list-unstyled">
52
+ <li><strong>Location:</strong> {{ location }}</li>
53
+ <li><strong>Area:</strong> {{ sqft }} sq.ft</li>
54
+ <li><strong>Bathrooms:</strong> {{ bath }}</li>
55
+ <li><strong>BHK:</strong> {{ bhk }}</li>
56
+ </ul>
57
+ </div>
58
+
59
+ <div class="back-button">
60
+ <a href="/" class="btn btn-primary">Make Another Prediction</a>
61
+ </div>
62
+ </div>
63
+
64
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
65
+ </body>
66
+ </html>
tests/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
tests/test2direct.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+
4
+ def load_model_and_features(model_path, feature_path):
5
+ """
6
+ Load the trained model and feature names from pickle files.
7
+
8
+ Args:
9
+ model_path (str): Path to the trained model pickle file.
10
+ feature_path (str): Path to the feature names pickle file.
11
+
12
+ Returns:
13
+ tuple: (trained model, feature names)
14
+ """
15
+ # Load the trained model
16
+ with open(model_path, "rb") as file:
17
+ model = pickle.load(file)
18
+
19
+ # Load the feature names
20
+ with open(feature_path, "rb") as file:
21
+ feature_names = pickle.load(file)
22
+
23
+ return model, feature_names
24
+
25
+ def predict_price(location, sqft, bath, bhk, model, feature_names):
26
+ """
27
+ Predict the price using the trained model.
28
+
29
+ Args:
30
+ location (str): Location name.
31
+ sqft (float): Total square footage.
32
+ bath (int): Number of bathrooms.
33
+ bhk (int): Number of bedrooms.
34
+ model: Trained model object.
35
+ feature_names (list): List of feature names.
36
+
37
+ Returns:
38
+ float: Predicted price.
39
+ """
40
+ # Create an input array with zeros for all features
41
+ x = np.zeros(len(feature_names))
42
+
43
+ # Assign values for sqft, bath, and bhk
44
+ if 'total_sqft' in feature_names:
45
+ x[feature_names.index('total_sqft')] = sqft
46
+ if 'bath' in feature_names:
47
+ x[feature_names.index('bath')] = bath
48
+ if 'bhk' in feature_names:
49
+ x[feature_names.index('bhk')] = bhk
50
+
51
+ # Set the location column to 1 if it exists in feature names
52
+ if location in feature_names:
53
+ loc_index = feature_names.index(location)
54
+ x[loc_index] = 1
55
+
56
+ # Make prediction
57
+ return model.predict([x])[0]
58
+
59
+ def main():
60
+ # Paths to the model and feature names
61
+ model_path = "models/lr_regg.pkl"
62
+ feature_path = "models/feature_names.pkl"
63
+
64
+ # Load the model and features
65
+ model, feature_names = load_model_and_features(model_path, feature_path)
66
+
67
+ # Test cases
68
+ test_cases = [
69
+ {"location": "Whitefield", "sqft": 1200, "bath": 2, "bhk": 2},
70
+ {"location": "Banaswadi", "sqft": 1500, "bath": 3, "bhk": 3},
71
+ {"location": "Basavangudi", "sqft": 1800, "bath": 3, "bhk": 4},
72
+ {"location": "Nonexistent Location", "sqft": 1000, "bath": 2, "bhk": 3},
73
+ {"location": "Electronic City Phase II", "sqft": 1056, "bath": 2, "bhk": 2},
74
+ {"location": "Chikka Tirupathi", "sqft": 800, "bath": 2, "bhk": 2}
75
+ ]
76
+
77
+ print("\nPredictions:")
78
+ for case in test_cases:
79
+ location = case["location"]
80
+ sqft = case["sqft"]
81
+ bath = case["bath"]
82
+ bhk = case["bhk"]
83
+
84
+ try:
85
+ predicted_price = predict_price(location, sqft, bath, bhk, model, feature_names)
86
+ print(f"Location: {location}, Sqft: {sqft}, Bath: {bath}, BHK: {bhk} -> Predicted Price: {predicted_price/10:.0f} lakhs")
87
+ except Exception as e:
88
+ print(f"Prediction failed for Location: {location}, Error: {e}")
89
+
90
+ if __name__ == "__main__":
91
+ main()
tests/test_model.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+
4
+ # Load model and feature names
5
+ def load_model_and_features(model_path, feature_path):
6
+ # Load the trained model
7
+ with open(model_path, "rb") as file:
8
+ model = pickle.load(file)
9
+
10
+ # Load the feature names
11
+ with open(feature_path, "rb") as file:
12
+ feature_names = pickle.load(file)
13
+
14
+ return model, feature_names
15
+
16
+ # Predict price using the model
17
+ def predict_price(location, sqft, bath, bhk, model, feature_names):
18
+ # Create an input array with zeros for all features
19
+ x = np.zeros(len(feature_names))
20
+
21
+ # Assign values for sqft, bath, and bhk
22
+ if 'total_sqft' in feature_names:
23
+ x[feature_names.index('total_sqft')] = sqft
24
+ if 'bath' in feature_names:
25
+ x[feature_names.index('bath')] = bath
26
+ if 'bhk' in feature_names:
27
+ x[feature_names.index('bhk')] = bhk
28
+
29
+ # Set the location column to 1 if it exists in feature names
30
+ if location in feature_names:
31
+ loc_index = feature_names.index(location)
32
+ x[loc_index] = 1
33
+
34
+ # Make prediction
35
+ return model.predict([x])[0]
36
+
37
+ # Test function
38
+ def test_house_price_predictions():
39
+ # Paths to the model and feature names
40
+ model_path = "models/lr_regg.pkl"
41
+ feature_path = "models/feature_names.pkl"
42
+
43
+ # Load the model and features
44
+ model, feature_names = load_model_and_features(model_path, feature_path)
45
+
46
+ # Test cases and expected outputs
47
+ test_cases = [
48
+ {"location": "Whitefield", "sqft": 1200, "bath": 2, "bhk": 2, "expected": 94},
49
+ {"location": "Banaswadi", "sqft": 1500, "bath": 3, "bhk": 3, "expected": 118},
50
+ {"location": "Basavangudi", "sqft": 1800, "bath": 3, "bhk": 4, "expected": 142},
51
+ {"location": "Nonexistent Location", "sqft": 1000, "bath": 2, "bhk": 3, "expected": 79},
52
+ {"location": "Electronic City Phase II", "sqft": 1056, "bath": 2, "bhk": 2, "expected": 83},
53
+ {"location": "Chikka Tirupathi", "sqft": 800, "bath": 2, "bhk": 2, "expected": 63}
54
+ ]
55
+
56
+ # Run predictions and validate against expected outputs
57
+ for case in test_cases:
58
+ location = case["location"]
59
+ sqft = case["sqft"]
60
+ bath = case["bath"]
61
+ bhk = case["bhk"]
62
+ expected = case["expected"]
63
+
64
+ try:
65
+ predicted_price = predict_price(location, sqft, bath, bhk, model, feature_names)
66
+ assert round(predicted_price / 10) == expected, (
67
+ f"Failed for Location: {location}, "
68
+ f"Expected: {expected}, Got: {predicted_price/10:.0f} lakhs"
69
+ )
70
+ print(f"Test Passed: Location: {location}, Predicted: {predicted_price/10:.0f} lakhs")
71
+ except Exception as e:
72
+ print(f"Prediction failed for Location: {location}, Error: {e}")
73
+
74
+ # Run the tests
75
+ if __name__ == "__main__":
76
+ test_house_price_predictions()