Cardiovascular-Disease-Detection / GradientBoostingClassifier.py
apipyo's picture
Rename untitled19 (1).py to GradientBoostingClassifier.py
b7767c6 verified
raw
history blame contribute delete
6.42 kB
# -*- coding: utf-8 -*-
"""Untitled19.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/14fK8TvV3AakdmLkH1MHkYcDeFVpENGGs
"""
!pip install datasets
!pip install huggingface_hub
!huggingface-cli login
from huggingface_hub import notebook_login
notebook_login()
!pip install tensorflow
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from huggingface_hub import upload_folder
import os
import shap # Make sure SHAP is installed: pip install shap
# Load the dataset directly as a CSV file using pandas
data = pd.read_csv("/content/cardio_train.csv", sep=';') # Ensure the correct delimiter is used
# Rename the target column
data = data.rename(columns={'cardio': 'target'})
# Select features and target
X = data.drop(columns='target') # Features
y = data['target'] # Target variable
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Build a Keras model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],))) # Input layer
model.add(tf.keras.layers.Dense(8, activation='relu')) # Hidden layer
model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # Output layer for binary classification
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Accuracy of Keras model: {accuracy:.4f}")
# Make predictions on the test set
y_pred = (model.predict(X_test) > 0.5).astype("int32") # Convert probabilities to binary
# Generate the classification report
report = classification_report(y_test, y_pred)
print("\nClassification Report:\n", report)
# Save the Keras model
model.save("Cardiovascular-Disease-Detection.keras")
# Specify the folder path for the model (the directory containing the model files)
folder_path = "apipyo/Cardiovascular_Disease"
# Create the directory if it doesn't exist
os.makedirs(folder_path, exist_ok=True)
# Move the saved model into the specified folder
os.rename("Cardiovascular-Disease-Detection.keras", os.path.join(folder_path, "Cardiovascular-Disease-Detection.keras"))
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist
data = pd.read_csv("/content/cardio_train.csv", sep = ";")
data = data.rename(columns = {'cardio':'target'})
data.head()
!pip install sweetviz
import sweetviz as sv
report = sv.analyze(data)
# Read file HTML
with open('Cardiac_Data_Analysis.html', 'r') as file:
report_html = file.read()
# HTML to file .bin
with open('Cardiac_Data_Analysis.bin', 'wb') as file:
file.write(report_html.encode('utf-8'))
import torch
# Read the content of the existing HTML file
with open('Cardiac_Data_Analysis.html', 'r', encoding='utf-8') as file:
report_html = file.read()
# Save the content as a binary file
with open('Cardiac_Data_Analysis.bin', 'wb') as file:
file.write(report_html.encode('utf-8'))
# Here we use the length of the HTML string as a simple example
html_tensor = torch.tensor([len(report_html)]) # Create a tensor with the length of the HTML content
# Save the tensor as a .pth file
torch.save(html_tensor, 'Cardiac_Data_Analysis.pth')
print("Files saved successfully: Cardiac_Data_Analysis.bin and Cardiac_Data_Analysis.pth")
X = data.drop(columns = 'target',axis = 1)
Y = data['target']
X = data.drop(columns = 'target',axis = 1)
Y = data['target']
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42) # Adjust test_size and random_state as needed
# Initialize the Gradient Boosting Classifier
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
# Train the model
gb_classifier.fit(X_train, y_train)
# Make predictions on the test set
y_pred = gb_classifier.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:\n", report)
input_data = (15,22530,1,169,80.0,120,80,1,1,0,0,1)
idata = np.asarray(input_data)
idata_reshaped = idata.reshape(1,-1)
model = gb_classifier # Assuming gb_classifier was trained in a previous cell
prediction = model.predict(idata_reshaped)
print(prediction)
if(prediction[0]==1):
{print("This person has heart desease")}
else:
print("This person is safe")
# Instead of using coef_, you can use feature_importances_ to see the importance of each feature.
importances = model.feature_importances_
features = X.columns
for feature, importance in zip(features, importances):
print(f'{feature}: {importance:.4f}')
import joblib
import numpy as np
import torch
# Assuming gb_classifier is your trained scikit-learn model and y_pred are your predictions
# Save the trained model using joblib
joblib.dump(gb_classifier, 'model.pkl')
# Convert predictions to a numpy array (if not already) and then to a tensor
predictions_tensor = torch.tensor(y_pred)
# Save predictions to a .pth file
torch.save(predictions_tensor, 'predictions.pth')
print("Model saved as model.pkl and predictions saved as predictions.pth")
import joblib
import numpy as np
import pickle
# Assuming `gb_classifier` is your trained scikit-learn model and `y_pred` are your predictions
# Save the trained scikit-learn model using joblib (which saves in binary format)
joblib.dump(gb_classifier, 'model.bin')
# Convert predictions to a numpy array if not already
predictions_array = np.array(y_pred)
# Save predictions to a binary file using numpy
np.save('predictions.bin', predictions_array)
print("Model saved as model.bin and predictions saved as predictions.bin")