diff --git "a/Project_19_Breast_Cancer_Classification_using_Machine_Learning.ipynb" "b/Project_19_Breast_Cancer_Classification_using_Machine_Learning.ipynb" new file mode 100644--- /dev/null +++ "b/Project_19_Breast_Cancer_Classification_using_Machine_Learning.ipynb" @@ -0,0 +1,2164 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "X3wT8l6lfj--" + }, + "source": [ + "Importing the Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "XqsQmOXGXXTe" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import sklearn.datasets\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pwJ9zLukg3Q_" + }, + "source": [ + "Data Collection & Processing" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "j6bMZMKUgz7L" + }, + "outputs": [], + "source": [ + "# loading the data from sklearn\n", + "breast_cancer_dataset = sklearn.datasets.load_breast_cancer()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xdY6i73KgkDG", + "outputId": "13bb7b58-e500-4360-e93f-0c83bbee8601" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,\n", + " 1.189e-01],\n", + " [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,\n", + " 8.902e-02],\n", + " [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,\n", + " 8.758e-02],\n", + " ...,\n", + " [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,\n", + " 7.820e-02],\n", + " [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,\n", + " 1.240e-01],\n", + " [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,\n", + " 7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n", + " 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n", + " 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n", + " 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n", + " 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n", + " 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n", + " 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n", + " 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n", + " 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n", + " 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n", + " 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n", + " 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n", + " 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n", + " 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n", + " 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]), 'frame': None, 'target_names': array(['malignant', 'benign'], dtype='\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", + "

5 rows × 30 columns

\n", + "" + ], + "text/plain": [ + " mean radius mean texture mean perimeter mean area mean smoothness \\\n", + "0 17.99 10.38 122.80 1001.0 0.11840 \n", + "1 20.57 17.77 132.90 1326.0 0.08474 \n", + "2 19.69 21.25 130.00 1203.0 0.10960 \n", + "3 11.42 20.38 77.58 386.1 0.14250 \n", + "4 20.29 14.34 135.10 1297.0 0.10030 \n", + "\n", + " mean compactness mean concavity mean concave points mean symmetry \\\n", + "0 0.27760 0.3001 0.14710 0.2419 \n", + "1 0.07864 0.0869 0.07017 0.1812 \n", + "2 0.15990 0.1974 0.12790 0.2069 \n", + "3 0.28390 0.2414 0.10520 0.2597 \n", + "4 0.13280 0.1980 0.10430 0.1809 \n", + "\n", + " mean fractal dimension ... worst radius worst texture worst perimeter \\\n", + "0 0.07871 ... 25.38 17.33 184.60 \n", + "1 0.05667 ... 24.99 23.41 158.80 \n", + "2 0.05999 ... 23.57 25.53 152.50 \n", + "3 0.09744 ... 14.91 26.50 98.87 \n", + "4 0.05883 ... 22.54 16.67 152.20 \n", + "\n", + " worst area worst smoothness worst compactness worst concavity \\\n", + "0 2019.0 0.1622 0.6656 0.7119 \n", + "1 1956.0 0.1238 0.1866 0.2416 \n", + "2 1709.0 0.1444 0.4245 0.4504 \n", + "3 567.7 0.2098 0.8663 0.6869 \n", + "4 1575.0 0.1374 0.2050 0.4000 \n", + "\n", + " worst concave points worst symmetry worst fractal dimension \n", + "0 0.2654 0.4601 0.11890 \n", + "1 0.1860 0.2750 0.08902 \n", + "2 0.2430 0.3613 0.08758 \n", + "3 0.2575 0.6638 0.17300 \n", + "4 0.1625 0.2364 0.07678 \n", + "\n", + "[5 rows x 30 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# print the first 5 rows of the dataframe\n", + "data_frame.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "UEvD_aTDiNLF" + }, + "outputs": [], + "source": [ + "# adding the 'target' column to the data frame\n", + "data_frame['label'] = breast_cancer_dataset.target" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 261 + }, + "id": "f_kmjEA5io2v", + "outputId": "3112fc43-947a-4c63-ab64-97b345acbb49" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensionlabel
56421.5622.39142.001479.00.111000.115900.243900.138900.17260.05623...26.40166.102027.00.141000.211300.41070.22160.20600.071150
56520.1328.25131.201261.00.097800.103400.144000.097910.17520.05533...38.25155.001731.00.116600.192200.32150.16280.25720.066370
56616.6028.08108.30858.10.084550.102300.092510.053020.15900.05648...34.12126.701124.00.113900.309400.34030.14180.22180.078200
56720.6029.33140.101265.00.117800.277000.351400.152000.23970.07016...39.42184.601821.00.165000.868100.93870.26500.40870.124000
5687.7624.5447.92181.00.052630.043620.000000.000000.15870.05884...30.3759.16268.60.089960.064440.00000.00000.28710.070391
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " mean radius mean texture mean perimeter mean area mean smoothness \\\n", + "564 21.56 22.39 142.00 1479.0 0.11100 \n", + "565 20.13 28.25 131.20 1261.0 0.09780 \n", + "566 16.60 28.08 108.30 858.1 0.08455 \n", + "567 20.60 29.33 140.10 1265.0 0.11780 \n", + "568 7.76 24.54 47.92 181.0 0.05263 \n", + "\n", + " mean compactness mean concavity mean concave points mean symmetry \\\n", + "564 0.11590 0.24390 0.13890 0.1726 \n", + "565 0.10340 0.14400 0.09791 0.1752 \n", + "566 0.10230 0.09251 0.05302 0.1590 \n", + "567 0.27700 0.35140 0.15200 0.2397 \n", + "568 0.04362 0.00000 0.00000 0.1587 \n", + "\n", + " mean fractal dimension ... worst texture worst perimeter worst area \\\n", + "564 0.05623 ... 26.40 166.10 2027.0 \n", + "565 0.05533 ... 38.25 155.00 1731.0 \n", + "566 0.05648 ... 34.12 126.70 1124.0 \n", + "567 0.07016 ... 39.42 184.60 1821.0 \n", + "568 0.05884 ... 30.37 59.16 268.6 \n", + "\n", + " worst smoothness worst compactness worst concavity \\\n", + "564 0.14100 0.21130 0.4107 \n", + "565 0.11660 0.19220 0.3215 \n", + "566 0.11390 0.30940 0.3403 \n", + "567 0.16500 0.86810 0.9387 \n", + "568 0.08996 0.06444 0.0000 \n", + "\n", + " worst concave points worst symmetry worst fractal dimension label \n", + "564 0.2216 0.2060 0.07115 0 \n", + "565 0.1628 0.2572 0.06637 0 \n", + "566 0.1418 0.2218 0.07820 0 \n", + "567 0.2650 0.4087 0.12400 0 \n", + "568 0.0000 0.2871 0.07039 1 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# print last 5 rows of the dataframe\n", + "data_frame.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sw3wjdK6iwK4", + "outputId": "2794ef2d-a645-474b-81c3-194509ab3c7e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(569, 31)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# number of rows and columns in the dataset\n", + "data_frame.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AWOjMuyBi77M", + "outputId": "947c79ac-2f30-4ba7-87d8-58a500f939fd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 569 entries, 0 to 568\n", + "Data columns (total 31 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 mean radius 569 non-null float64\n", + " 1 mean texture 569 non-null float64\n", + " 2 mean perimeter 569 non-null float64\n", + " 3 mean area 569 non-null float64\n", + " 4 mean smoothness 569 non-null float64\n", + " 5 mean compactness 569 non-null float64\n", + " 6 mean concavity 569 non-null float64\n", + " 7 mean concave points 569 non-null float64\n", + " 8 mean symmetry 569 non-null float64\n", + " 9 mean fractal dimension 569 non-null float64\n", + " 10 radius error 569 non-null float64\n", + " 11 texture error 569 non-null float64\n", + " 12 perimeter error 569 non-null float64\n", + " 13 area error 569 non-null float64\n", + " 14 smoothness error 569 non-null float64\n", + " 15 compactness error 569 non-null float64\n", + " 16 concavity error 569 non-null float64\n", + " 17 concave points error 569 non-null float64\n", + " 18 symmetry error 569 non-null float64\n", + " 19 fractal dimension error 569 non-null float64\n", + " 20 worst radius 569 non-null float64\n", + " 21 worst texture 569 non-null float64\n", + " 22 worst perimeter 569 non-null float64\n", + " 23 worst area 569 non-null float64\n", + " 24 worst smoothness 569 non-null float64\n", + " 25 worst compactness 569 non-null float64\n", + " 26 worst concavity 569 non-null float64\n", + " 27 worst concave points 569 non-null float64\n", + " 28 worst symmetry 569 non-null float64\n", + " 29 worst fractal dimension 569 non-null float64\n", + " 30 label 569 non-null int64 \n", + "dtypes: float64(30), int64(1)\n", + "memory usage: 137.9 KB\n" + ] + } + ], + "source": [ + "# getting some information about the data\n", + "data_frame.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RuoVIUTYjLpk", + "outputId": "c5648482-6fe7-4877-d82a-5cf6c5813a34" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "mean radius 0\n", + "mean texture 0\n", + "mean perimeter 0\n", + "mean area 0\n", + "mean smoothness 0\n", + "mean compactness 0\n", + "mean concavity 0\n", + "mean concave points 0\n", + "mean symmetry 0\n", + "mean fractal dimension 0\n", + "radius error 0\n", + "texture error 0\n", + "perimeter error 0\n", + "area error 0\n", + "smoothness error 0\n", + "compactness error 0\n", + "concavity error 0\n", + "concave points error 0\n", + "symmetry error 0\n", + "fractal dimension error 0\n", + "worst radius 0\n", + "worst texture 0\n", + "worst perimeter 0\n", + "worst area 0\n", + "worst smoothness 0\n", + "worst compactness 0\n", + "worst concavity 0\n", + "worst concave points 0\n", + "worst symmetry 0\n", + "worst fractal dimension 0\n", + "label 0\n", + "dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# checking for missing values\n", + "data_frame.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 354 + }, + "id": "oLMuXI33jlkq", + "outputId": "8e1d9a5f-578d-432d-93e7-1f4682d0bed9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensionlabel
count569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000...569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000
mean14.12729219.28964991.969033654.8891040.0963600.1043410.0887990.0489190.1811620.062798...25.677223107.261213880.5831280.1323690.2542650.2721880.1146060.2900760.0839460.627417
std3.5240494.30103624.298981351.9141290.0140640.0528130.0797200.0388030.0274140.007060...6.14625833.602542569.3569930.0228320.1573360.2086240.0657320.0618670.0180610.483918
min6.9810009.71000043.790000143.5000000.0526300.0193800.0000000.0000000.1060000.049960...12.02000050.410000185.2000000.0711700.0272900.0000000.0000000.1565000.0550400.000000
25%11.70000016.17000075.170000420.3000000.0863700.0649200.0295600.0203100.1619000.057700...21.08000084.110000515.3000000.1166000.1472000.1145000.0649300.2504000.0714600.000000
50%13.37000018.84000086.240000551.1000000.0958700.0926300.0615400.0335000.1792000.061540...25.41000097.660000686.5000000.1313000.2119000.2267000.0999300.2822000.0800401.000000
75%15.78000021.800000104.100000782.7000000.1053000.1304000.1307000.0740000.1957000.066120...29.720000125.4000001084.0000000.1460000.3391000.3829000.1614000.3179000.0920801.000000
max28.11000039.280000188.5000002501.0000000.1634000.3454000.4268000.2012000.3040000.097440...49.540000251.2000004254.0000000.2226001.0580001.2520000.2910000.6638000.2075001.000000
\n", + "

8 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " mean radius mean texture mean perimeter mean area \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 14.127292 19.289649 91.969033 654.889104 \n", + "std 3.524049 4.301036 24.298981 351.914129 \n", + "min 6.981000 9.710000 43.790000 143.500000 \n", + "25% 11.700000 16.170000 75.170000 420.300000 \n", + "50% 13.370000 18.840000 86.240000 551.100000 \n", + "75% 15.780000 21.800000 104.100000 782.700000 \n", + "max 28.110000 39.280000 188.500000 2501.000000 \n", + "\n", + " mean smoothness mean compactness mean concavity mean concave points \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 0.096360 0.104341 0.088799 0.048919 \n", + "std 0.014064 0.052813 0.079720 0.038803 \n", + "min 0.052630 0.019380 0.000000 0.000000 \n", + "25% 0.086370 0.064920 0.029560 0.020310 \n", + "50% 0.095870 0.092630 0.061540 0.033500 \n", + "75% 0.105300 0.130400 0.130700 0.074000 \n", + "max 0.163400 0.345400 0.426800 0.201200 \n", + "\n", + " mean symmetry mean fractal dimension ... worst texture \\\n", + "count 569.000000 569.000000 ... 569.000000 \n", + "mean 0.181162 0.062798 ... 25.677223 \n", + "std 0.027414 0.007060 ... 6.146258 \n", + "min 0.106000 0.049960 ... 12.020000 \n", + "25% 0.161900 0.057700 ... 21.080000 \n", + "50% 0.179200 0.061540 ... 25.410000 \n", + "75% 0.195700 0.066120 ... 29.720000 \n", + "max 0.304000 0.097440 ... 49.540000 \n", + "\n", + " worst perimeter worst area worst smoothness worst compactness \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 107.261213 880.583128 0.132369 0.254265 \n", + "std 33.602542 569.356993 0.022832 0.157336 \n", + "min 50.410000 185.200000 0.071170 0.027290 \n", + "25% 84.110000 515.300000 0.116600 0.147200 \n", + "50% 97.660000 686.500000 0.131300 0.211900 \n", + "75% 125.400000 1084.000000 0.146000 0.339100 \n", + "max 251.200000 4254.000000 0.222600 1.058000 \n", + "\n", + " worst concavity worst concave points worst symmetry \\\n", + "count 569.000000 569.000000 569.000000 \n", + "mean 0.272188 0.114606 0.290076 \n", + "std 0.208624 0.065732 0.061867 \n", + "min 0.000000 0.000000 0.156500 \n", + "25% 0.114500 0.064930 0.250400 \n", + "50% 0.226700 0.099930 0.282200 \n", + "75% 0.382900 0.161400 0.317900 \n", + "max 1.252000 0.291000 0.663800 \n", + "\n", + " worst fractal dimension label \n", + "count 569.000000 569.000000 \n", + "mean 0.083946 0.627417 \n", + "std 0.018061 0.483918 \n", + "min 0.055040 0.000000 \n", + "25% 0.071460 0.000000 \n", + "50% 0.080040 1.000000 \n", + "75% 0.092080 1.000000 \n", + "max 0.207500 1.000000 \n", + "\n", + "[8 rows x 31 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# statistical measures about the data\n", + "data_frame.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tC8Yii4Yjzer", + "outputId": "39273efc-e950-4cfe-fbc5-046f93ecc719" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "label\n", + "1 357\n", + "0 212\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# checking the distribution of Target Varibale\n", + "data_frame['label'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HbbkzWeFkjqc" + }, + "source": [ + "1 --> Benign\n", + "\n", + "0 --> Malignant" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 198 + }, + "id": "YGWHjrVSkN5c", + "outputId": "b7863331-c13d-4f2a-a88a-37905eeb14e6" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
label
017.46283021.604906115.365377978.3764150.1028980.1451880.1607750.0879900.1929090.062680...21.13481129.318208141.3703301422.2863210.1448450.3748240.4506060.1822370.3234680.091530
112.14652417.91476278.075406462.7901960.0924780.0800850.0460580.0257170.1741860.062867...13.37980123.51507087.005938558.8994400.1249590.1826730.1662380.0744440.2702460.079442
\n", + "

2 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " mean radius mean texture mean perimeter mean area mean smoothness \\\n", + "label \n", + "0 17.462830 21.604906 115.365377 978.376415 0.102898 \n", + "1 12.146524 17.914762 78.075406 462.790196 0.092478 \n", + "\n", + " mean compactness mean concavity mean concave points mean symmetry \\\n", + "label \n", + "0 0.145188 0.160775 0.087990 0.192909 \n", + "1 0.080085 0.046058 0.025717 0.174186 \n", + "\n", + " mean fractal dimension ... worst radius worst texture \\\n", + "label ... \n", + "0 0.062680 ... 21.134811 29.318208 \n", + "1 0.062867 ... 13.379801 23.515070 \n", + "\n", + " worst perimeter worst area worst smoothness worst compactness \\\n", + "label \n", + "0 141.370330 1422.286321 0.144845 0.374824 \n", + "1 87.005938 558.899440 0.124959 0.182673 \n", + "\n", + " worst concavity worst concave points worst symmetry \\\n", + "label \n", + "0 0.450606 0.182237 0.323468 \n", + "1 0.166238 0.074444 0.270246 \n", + "\n", + " worst fractal dimension \n", + "label \n", + "0 0.091530 \n", + "1 0.079442 \n", + "\n", + "[2 rows x 30 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_frame.groupby('label').mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tUPYps4DlVFR" + }, + "source": [ + "Separating the features and target" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "Z5pD8rP5kzKD" + }, + "outputs": [], + "source": [ + "X = data_frame.drop(columns='label', axis=1)\n", + "Y = data_frame['label']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z-GWJHpAlpWJ", + "outputId": "31cc5c11-1b83-418e-8a49-23647e6fdd9d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " mean radius mean texture mean perimeter mean area mean smoothness \\\n", + "0 17.99 10.38 122.80 1001.0 0.11840 \n", + "1 20.57 17.77 132.90 1326.0 0.08474 \n", + "2 19.69 21.25 130.00 1203.0 0.10960 \n", + "3 11.42 20.38 77.58 386.1 0.14250 \n", + "4 20.29 14.34 135.10 1297.0 0.10030 \n", + ".. ... ... ... ... ... \n", + "564 21.56 22.39 142.00 1479.0 0.11100 \n", + "565 20.13 28.25 131.20 1261.0 0.09780 \n", + "566 16.60 28.08 108.30 858.1 0.08455 \n", + "567 20.60 29.33 140.10 1265.0 0.11780 \n", + "568 7.76 24.54 47.92 181.0 0.05263 \n", + "\n", + " mean compactness mean concavity mean concave points mean symmetry \\\n", + "0 0.27760 0.30010 0.14710 0.2419 \n", + "1 0.07864 0.08690 0.07017 0.1812 \n", + "2 0.15990 0.19740 0.12790 0.2069 \n", + "3 0.28390 0.24140 0.10520 0.2597 \n", + "4 0.13280 0.19800 0.10430 0.1809 \n", + ".. ... ... ... ... \n", + "564 0.11590 0.24390 0.13890 0.1726 \n", + "565 0.10340 0.14400 0.09791 0.1752 \n", + "566 0.10230 0.09251 0.05302 0.1590 \n", + "567 0.27700 0.35140 0.15200 0.2397 \n", + "568 0.04362 0.00000 0.00000 0.1587 \n", + "\n", + " mean fractal dimension ... worst radius worst texture \\\n", + "0 0.07871 ... 25.380 17.33 \n", + "1 0.05667 ... 24.990 23.41 \n", + "2 0.05999 ... 23.570 25.53 \n", + "3 0.09744 ... 14.910 26.50 \n", + "4 0.05883 ... 22.540 16.67 \n", + ".. ... ... ... ... \n", + "564 0.05623 ... 25.450 26.40 \n", + "565 0.05533 ... 23.690 38.25 \n", + "566 0.05648 ... 18.980 34.12 \n", + "567 0.07016 ... 25.740 39.42 \n", + "568 0.05884 ... 9.456 30.37 \n", + "\n", + " worst perimeter worst area worst smoothness worst compactness \\\n", + "0 184.60 2019.0 0.16220 0.66560 \n", + "1 158.80 1956.0 0.12380 0.18660 \n", + "2 152.50 1709.0 0.14440 0.42450 \n", + "3 98.87 567.7 0.20980 0.86630 \n", + "4 152.20 1575.0 0.13740 0.20500 \n", + ".. ... ... ... ... \n", + "564 166.10 2027.0 0.14100 0.21130 \n", + "565 155.00 1731.0 0.11660 0.19220 \n", + "566 126.70 1124.0 0.11390 0.30940 \n", + "567 184.60 1821.0 0.16500 0.86810 \n", + "568 59.16 268.6 0.08996 0.06444 \n", + "\n", + " worst concavity worst concave points worst symmetry \\\n", + "0 0.7119 0.2654 0.4601 \n", + "1 0.2416 0.1860 0.2750 \n", + "2 0.4504 0.2430 0.3613 \n", + "3 0.6869 0.2575 0.6638 \n", + "4 0.4000 0.1625 0.2364 \n", + ".. ... ... ... \n", + "564 0.4107 0.2216 0.2060 \n", + "565 0.3215 0.1628 0.2572 \n", + "566 0.3403 0.1418 0.2218 \n", + "567 0.9387 0.2650 0.4087 \n", + "568 0.0000 0.0000 0.2871 \n", + "\n", + " worst fractal dimension \n", + "0 0.11890 \n", + "1 0.08902 \n", + "2 0.08758 \n", + "3 0.17300 \n", + "4 0.07678 \n", + ".. ... \n", + "564 0.07115 \n", + "565 0.06637 \n", + "566 0.07820 \n", + "567 0.12400 \n", + "568 0.07039 \n", + "\n", + "[569 rows x 30 columns]\n" + ] + } + ], + "source": [ + "print(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "r5rs8pColqsn", + "outputId": "842ccf4a-94a2-4409-df9a-79a25ad46f15" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "564 0\n", + "565 0\n", + "566 0\n", + "567 0\n", + "568 1\n", + "Name: label, Length: 569, dtype: int64\n" + ] + } + ], + "source": [ + "print(Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B5yk9d-Nl4VV" + }, + "source": [ + "Splitting the data into training data & Testing data" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "vZtU30bPluG_" + }, + "outputs": [], + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zWaeuX3amqYH", + "outputId": "30695494-2fed-44bd-d533-34bbae6a7712" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(569, 30) (455, 30) (114, 30)\n" + ] + } + ], + "source": [ + "print(X.shape, X_train.shape, X_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ttUvP8-im6z3" + }, + "source": [ + "Model Training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OPPOng9Em8eb" + }, + "source": [ + "Logistic Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "xhBPDKZmm0dk" + }, + "outputs": [], + "source": [ + "model = LogisticRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NkQlXVftnKOm", + "outputId": "2cdd7798-4283-4965-9a04-dc47600f5aa5" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\HP\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:465: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "data": { + "text/html": [ + "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# training the Logistic Regression model using Training data\n", + "\n", + "model.fit(X_train, Y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_Wvez7R5nj5P" + }, + "source": [ + "Model Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Um1zCUo1nmI1" + }, + "source": [ + "Accuracy Score" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "Xy_Ob3Zcnh32" + }, + "outputs": [], + "source": [ + "# accuracy on training data\n", + "X_train_prediction = model.predict(X_train)\n", + "training_data_accuracy = accuracy_score(Y_train, X_train_prediction)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fsx-3RlfoMzN", + "outputId": "c30e9778-f98b-42f7-b0f5-84b32778a574" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on training data = 0.9318681318681319\n" + ] + } + ], + "source": [ + "print('Accuracy on training data = ', training_data_accuracy)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "1QYDU6YjoTFl" + }, + "outputs": [], + "source": [ + "# accuracy on test data\n", + "X_test_prediction = model.predict(X_test)\n", + "test_data_accuracy = accuracy_score(Y_test, X_test_prediction)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7HbERDa8orMw", + "outputId": "59e3c5aa-4965-4eab-b349-dcaa60e2c29c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on test data = 0.9298245614035088\n" + ] + } + ], + "source": [ + "print('Accuracy on test data = ', test_data_accuracy)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wDV18UAJpB2B" + }, + "source": [ + "Building a Predictive System" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Rf_x3hPpouQh", + "outputId": "406e9d47-b912-40d3-fda5-1f6d67c21270" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1]\n", + "The Breast Cancer is Benign\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\HP\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\utils\\validation.py:2739: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "input_data = (13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259)\n", + "\n", + "# change the input data to a numpy array\n", + "input_data_as_numpy_array = np.asarray(input_data)\n", + "\n", + "# reshape the numpy array as we are predicting for one datapoint\n", + "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n", + "\n", + "prediction = model.predict(input_data_reshaped)\n", + "print(prediction)\n", + "\n", + "if (prediction[0] == 0):\n", + " print('The Breast cancer is Malignant')\n", + "\n", + "else:\n", + " print('The Breast Cancer is Benign')\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tZVUPp5hqJn1" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}