Cardiovascular-Disease-Detection / Cardiovascular-Disease-Detection.py

Rename untitled19.py to Cardiovascular-Disease-Detection.py

a1317ed verified 6 months ago

6.85 kB

	# -- coding: utf-8 --
	"""Untitled19.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/14fK8TvV3AakdmLkH1MHkYcDeFVpENGGs
	"""

	!pip install datasets

	!pip install huggingface_hub

	!huggingface-cli login

	from huggingface_hub import notebook_login

	notebook_login()

	!pip install tensorflow

	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report
	import tensorflow as tf
	from huggingface_hub import upload_folder
	import os
	import shap # Make sure SHAP is installed: pip install shap

	# Load the dataset directly as a CSV file using pandas
	data = pd.read_csv("/content/cardio_train.csv", sep=';') # Ensure the correct delimiter is used

	# Rename the target column
	data = data.rename(columns={'cardio': 'target'})

	# Select features and target
	X = data.drop(columns='target') # Features
	y = data['target'] # Target variable

	# Split the data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Build a Keras model
	model = tf.keras.models.Sequential()
	model.add(tf.keras.layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],))) # Input layer
	model.add(tf.keras.layers.Dense(8, activation='relu')) # Hidden layer
	model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # Output layer for binary classification

	# Compile the model
	model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

	# Train the model
	model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

	# Evaluate the model on the test set
	loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
	print(f"Accuracy of Keras model: {accuracy:.4f}")

	# Make predictions on the test set
	y_pred = (model.predict(X_test) > 0.5).astype("int32") # Convert probabilities to binary

	# Generate the classification report
	report = classification_report(y_test, y_pred)
	print("\nClassification Report:\n", report)

	# Save the Keras model
	model.save("Cardiovascular-Disease-Detection.keras")

	# Specify the folder path for the model (the directory containing the model files)
	folder_path = "apipyo/Cardiovascular_Disease"

	# Create the directory if it doesn't exist
	os.makedirs(folder_path, exist_ok=True)

	# Move the saved model into the specified folder
	os.rename("Cardiovascular-Disease-Detection.keras", os.path.join(folder_path, "Cardiovascular-Disease-Detection.keras"))

	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report
	import tensorflow as tf
	from huggingface_hub import upload_folder
	import os
	import shap

	# Define folder_path here, making it accessible to this cell
	folder_path = "apipyo/Cardiovascular_Disease"

	upload_folder(
	repo_id="apipyo/Cardiovascular-Disease-Detection", # Ensure you have permissions for this repo
	folder_path=folder_path,
	repo_type="model"
	)

	print("Model uploaded successfully!")

	# ... (rest of your code)

	# Load the dataset directly as a CSV file using pandas
	data = pd.read_csv("/content/cardio_train.csv", sep=';') # Ensure the correct delimiter is used

	# Rename the target column
	data = data.rename(columns={'cardio': 'target'})

	# Select features and target
	X = data.drop(columns='target') # Features
	y = data['target'] # Target variable

	# Split the data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Build a Keras model
	model = tf.keras.models.Sequential() # Define 'model' here to make it accessible
	model.add(tf.keras.layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],))) # Input layer
	model.add(tf.keras.layers.Dense(8, activation='relu')) # Hidden layer
	model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # Output layer for binary classification

	# Compile the model
	model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

	# Train the model (Optional: You might want to comment this out if you're using a pre-trained model)
	# model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

	# Feature Importance Calculation using SHAP
	# Calculate SHAP values
	explainer = shap.KernelExplainer(model.predict, X_train)
	shap_values = explainer.shap_values(X_test)

	# Plot the SHAP values
	shap.summary_plot(shap_values, X_test)

	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import GradientBoostingClassifier
	from sklearn.metrics import accuracy_score, classification_report


	import tensorflow as tf
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Dense, Flatten
	from tensorflow.keras.datasets import mnist

	data = pd.read_csv("/content/cardio_train.csv", sep = ";")
	data = data.rename(columns = {'cardio':'target'})
	data.head()

	!pip install sweetviz
	import sweetviz as sv
	report = sv.analyze(data)

	# Membaca isi file HTML jika sudah ada
	with open('Cardiac_Data_Analysis.html', 'r') as file:
	report_html = file.read()

	# Simpan konten HTML sebagai file .bin
	with open('Cardiac_Data_Analysis.bin', 'wb') as file:
	file.write(report_html.encode('utf-8'))

	X = data.drop(columns = 'target',axis = 1)
	Y = data['target']

	X = data.drop(columns = 'target',axis = 1)
	Y = data['target']

	# Split data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42) # Adjust test_size and random_state as needed


	# Initialize the Gradient Boosting Classifier
	gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

	# Train the model
	gb_classifier.fit(X_train, y_train)

	# Make predictions on the test set
	y_pred = gb_classifier.predict(X_test)

	# Evaluate the model
	accuracy = accuracy_score(y_test, y_pred)
	report = classification_report(y_test, y_pred)

	print("Accuracy:", accuracy)
	print("\nClassification Report:\n", report)

	input_data = (15,22530,1,169,80.0,120,80,1,1,0,0,1)
	idata = np.asarray(input_data)
	idata_reshaped = idata.reshape(1,-1)

	model = gb_classifier # Assuming gb_classifier was trained in a previous cell

	prediction = model.predict(idata_reshaped)

	print(prediction)

	if(prediction[0]==1):
	{print("This person has heart desease")}
	else:
	print("This person is safe")

	# Instead of using coef_, you can use feature_importances_ to see the importance of each feature.
	importances = model.feature_importances_
	features = X.columns
	for feature, importance in zip(features, importances):
	print(f'{feature}: {importance:.4f}')

	from huggingface_hub import push_to_hub_keras

	push_to_hub_keras(model, 'Cardiovascular-Disease-Detection')