Source: The dataset was obtained from the Heart Disease Classification - Neural Network Kaggle Notebook data page.
The goal is to create a classification model that can accurately predict whether a patient will or won't have heart disease based on their recorded health metrics.
Contents
import numpy as np
import numpy.matlib
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial import distance_matrix
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
import seaborn as sns
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_recall_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from sklearn.svm import SVC
%matplotlib inline
heart = pd.read_csv('heart.csv')
heart.head(10)
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 3 | 145 | 233 | 1 | 0 | 150 | 0 | 2.3 | 0 | 0 | 1 | 1 |
| 1 | 37 | 1 | 2 | 130 | 250 | 0 | 1 | 187 | 0 | 3.5 | 0 | 0 | 2 | 1 |
| 2 | 41 | 0 | 1 | 130 | 204 | 0 | 0 | 172 | 0 | 1.4 | 2 | 0 | 2 | 1 |
| 3 | 56 | 1 | 1 | 120 | 236 | 0 | 1 | 178 | 0 | 0.8 | 2 | 0 | 2 | 1 |
| 4 | 57 | 0 | 0 | 120 | 354 | 0 | 1 | 163 | 1 | 0.6 | 2 | 0 | 2 | 1 |
| 5 | 57 | 1 | 0 | 140 | 192 | 0 | 1 | 148 | 0 | 0.4 | 1 | 0 | 1 | 1 |
| 6 | 56 | 0 | 1 | 140 | 294 | 0 | 0 | 153 | 0 | 1.3 | 1 | 0 | 2 | 1 |
| 7 | 44 | 1 | 1 | 120 | 263 | 0 | 1 | 173 | 0 | 0.0 | 2 | 0 | 3 | 1 |
| 8 | 52 | 1 | 2 | 172 | 199 | 1 | 1 | 162 | 0 | 0.5 | 2 | 0 | 3 | 1 |
| 9 | 57 | 1 | 2 | 150 | 168 | 0 | 1 | 174 | 0 | 1.6 | 2 | 0 | 2 | 1 |
heart.describe()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 54.366337 | 0.683168 | 0.966997 | 131.623762 | 246.264026 | 0.148515 | 0.528053 | 149.646865 | 0.326733 | 1.039604 | 1.399340 | 0.729373 | 2.313531 | 0.544554 |
| std | 9.082101 | 0.466011 | 1.032052 | 17.538143 | 51.830751 | 0.356198 | 0.525860 | 22.905161 | 0.469794 | 1.161075 | 0.616226 | 1.022606 | 0.612277 | 0.498835 |
| min | 29.000000 | 0.000000 | 0.000000 | 94.000000 | 126.000000 | 0.000000 | 0.000000 | 71.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 47.500000 | 0.000000 | 0.000000 | 120.000000 | 211.000000 | 0.000000 | 0.000000 | 133.500000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 2.000000 | 0.000000 |
| 50% | 55.000000 | 1.000000 | 1.000000 | 130.000000 | 240.000000 | 0.000000 | 1.000000 | 153.000000 | 0.000000 | 0.800000 | 1.000000 | 0.000000 | 2.000000 | 1.000000 |
| 75% | 61.000000 | 1.000000 | 2.000000 | 140.000000 | 274.500000 | 0.000000 | 1.000000 | 166.000000 | 1.000000 | 1.600000 | 2.000000 | 1.000000 | 3.000000 | 1.000000 |
| max | 77.000000 | 1.000000 | 3.000000 | 200.000000 | 564.000000 | 1.000000 | 2.000000 | 202.000000 | 1.000000 | 6.200000 | 2.000000 | 4.000000 | 3.000000 | 1.000000 |
heart.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 age 303 non-null int64 1 sex 303 non-null int64 2 cp 303 non-null int64 3 trestbps 303 non-null int64 4 chol 303 non-null int64 5 fbs 303 non-null int64 6 restecg 303 non-null int64 7 thalach 303 non-null int64 8 exang 303 non-null int64 9 oldpeak 303 non-null float64 10 slope 303 non-null int64 11 ca 303 non-null int64 12 thal 303 non-null int64 13 target 303 non-null int64 dtypes: float64(1), int64(13) memory usage: 33.3 KB
heart.isnull().sum()
age 0 sex 0 cp 0 trestbps 0 chol 0 fbs 0 restecg 0 thalach 0 exang 0 oldpeak 0 slope 0 ca 0 thal 0 target 0 dtype: int64
heart.shape
(303, 14)
Overview of the Data
Considerations
Source: For more information on normalisation and how to know when to use standardisation vs normalisation click here and here.
At this stage, we are checking data distributions and for the presence of outliers to help decide which normalisation technique will be best to use.
Source: See this medium article for a guide on detecting and removing outliers from datasets in Python.
# Create some smaller data frames (each df contains columns of similar ranges) so we can have multiple boxplots in each figure
heartbox1 = pd.DataFrame(heart, columns=["age", "trestbps", "chol", "thalach"])
# Check to see above worked and that ranges are in similar range (e.g. tens and hundreds)
heartbox1.describe()
| age | trestbps | chol | thalach | |
|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 54.366337 | 131.623762 | 246.264026 | 149.646865 |
| std | 9.082101 | 17.538143 | 51.830751 | 22.905161 |
| min | 29.000000 | 94.000000 | 126.000000 | 71.000000 |
| 25% | 47.500000 | 120.000000 | 211.000000 | 133.500000 |
| 50% | 55.000000 | 130.000000 | 240.000000 | 153.000000 |
| 75% | 61.000000 | 140.000000 | 274.500000 | 166.000000 |
| max | 77.000000 | 200.000000 | 564.000000 | 202.000000 |
heartbox2 = pd.DataFrame(heart, columns=["cp", "restecg", "oldpeak", "slope", "ca", "thal"])
heartbox2.describe()
| cp | restecg | oldpeak | slope | ca | thal | |
|---|---|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 0.966997 | 0.528053 | 1.039604 | 1.399340 | 0.729373 | 2.313531 |
| std | 1.032052 | 0.525860 | 1.161075 | 0.616226 | 1.022606 | 0.612277 |
| min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 2.000000 |
| 50% | 1.000000 | 1.000000 | 0.800000 | 1.000000 | 0.000000 | 2.000000 |
| 75% | 2.000000 | 1.000000 | 1.600000 | 2.000000 | 1.000000 | 3.000000 |
| max | 3.000000 | 2.000000 | 6.200000 | 2.000000 | 4.000000 | 3.000000 |
heartbox3 = pd.DataFrame(heart, columns=["sex", "fbs", "exang", "target"])
heartbox3.describe()
| sex | fbs | exang | target | |
|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 0.683168 | 0.148515 | 0.326733 | 0.544554 |
| std | 0.466011 | 0.356198 | 0.469794 | 0.498835 |
| min | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 50% | 1.000000 | 0.000000 | 0.000000 | 1.000000 |
| 75% | 1.000000 | 0.000000 | 1.000000 | 1.000000 |
| max | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
# Create boxplot for each feature with range between 10s and 100s
plt.figure(figsize=(20,10))
ax1 = sns.boxplot(data=heartbox1, orient="h", palette="Set1")
# Get closer look at age
ax1a = sns.boxplot(x=heart["age"])
# Create boxplot for each feature with range between 0 and less than 10
plt.figure(figsize=(20,10))
ax2 = sns.boxplot(data=heartbox2, orient="h", palette="Set2")
# Create boxplot for each feature with range between 0 and 1
plt.figure(figsize=(20,10))
ax3 = sns.boxplot(data=heartbox3, orient="h", palette="Set3")
Features with Outliers
Features with skews
Given the high presence of outliers, MinMaxScaler will not be an appropriate normalisation technique to adopt for this dataset. RobustScaler will be implemented initially as it appears to be the most robust to outliers (compared to MinMaxScaler, Normalization, and StandardScaler). Below is information on the Robust Scaler technique [see scikit-learn's][https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html#sklearn.preprocessing.RobustScaler] guide on RobustScaler for more information.
# Apply the RobustScaler
Rscaler = RobustScaler(quantile_range=(25,75))
Rscaled_heart = Rscaler.fit_transform(heart)
print('The means after using RobustScaler for heart is: ', Rscaled_heart.mean(axis=0))
print('The standard deviations after using RobustScaler for heart is: ', Rscaled_heart.std(axis=0))
The means after using RobustScaler for heart is: [-0.04693803 -0.31683168 -0.01650165 0.08118812 0.09864609 0.14851485 -0.47194719 -0.10317339 0.32673267 0.14975248 0.39933993 0.72937294 0.31353135 -0.45544554] The standard deviations after using RobustScaler for heart is: [0.67163716 0.46524119 0.51517401 0.8754589 0.81488427 0.3556096 0.52499112 0.70361023 0.46901859 0.72447342 0.61520843 1.0209175 0.61126531 0.49801094]
print('The means before using RobustScaler for heart is: \n', heart.mean(axis=0))
print('The standard deviations before using RobustScaler for heart is: \n', heart.std(axis=0))
The means before using RobustScaler for heart is: age 54.366337 sex 0.683168 cp 0.966997 trestbps 131.623762 chol 246.264026 fbs 0.148515 restecg 0.528053 thalach 149.646865 exang 0.326733 oldpeak 1.039604 slope 1.399340 ca 0.729373 thal 2.313531 target 0.544554 dtype: float64 The standard deviations before using RobustScaler for heart is: age 9.082101 sex 0.466011 cp 1.032052 trestbps 17.538143 chol 51.830751 fbs 0.356198 restecg 0.525860 thalach 22.905161 exang 0.469794 oldpeak 1.161075 slope 0.616226 ca 1.022606 thal 0.612277 target 0.498835 dtype: float64
compare = {'Old Mean':[54.366337, 0.683168, 0.966997, 131.623762, 246.264026, 0.148515, 0.528053, 149.646865, 0.326733, 1.039604, 1.399340, 0.729373, 2.313531, 0.544554],
'New Mean':[-0.04693803, -0.31683168, -0.01650165, 0.08118812, 0.09864609, 0.14851485, -0.47194719, -0.10317339, 0.32673267, 0.14975248, 0.39933993, 0.72937294, 0.31353135, -0.45544554],
'Old Std Dev':[9.082101, 0.466011, 1.032052, 17.538143, 51.830751, 0.356198, 0.525860, 22.905161, 0.469794, 1.161075, 0.616226, 1.022606, 0.612277, 0.498835],
'New Std Dev':[0.67163716, 0.46524119, 0.51517401, 0.8754589, 0.81488427, 0.3556096, 0.52499112, 0.70361023, 0.46901859, 0.72447342, 0.61520843, 1.0209175, 0.61126531, 0.49801094]}
compare_df = pd.DataFrame(data=compare)
compare_df
| Old Mean | New Mean | Old Std Dev | New Std Dev | |
|---|---|---|---|---|
| 0 | 54.366337 | -0.046938 | 9.082101 | 0.671637 |
| 1 | 0.683168 | -0.316832 | 0.466011 | 0.465241 |
| 2 | 0.966997 | -0.016502 | 1.032052 | 0.515174 |
| 3 | 131.623762 | 0.081188 | 17.538143 | 0.875459 |
| 4 | 246.264026 | 0.098646 | 51.830751 | 0.814884 |
| 5 | 0.148515 | 0.148515 | 0.356198 | 0.355610 |
| 6 | 0.528053 | -0.471947 | 0.525860 | 0.524991 |
| 7 | 149.646865 | -0.103173 | 22.905161 | 0.703610 |
| 8 | 0.326733 | 0.326733 | 0.469794 | 0.469019 |
| 9 | 1.039604 | 0.149752 | 1.161075 | 0.724473 |
| 10 | 1.399340 | 0.399340 | 0.616226 | 0.615208 |
| 11 | 0.729373 | 0.729373 | 1.022606 | 1.020917 |
| 12 | 2.313531 | 0.313531 | 0.612277 | 0.611265 |
| 13 | 0.544554 | -0.455446 | 0.498835 | 0.498011 |
# Plot boxplot for RobustScaler version of heart data to demonstrate that all data is within similar ranges of each other and have maintained individual range integrity.
plt.figure(figsize=(20,10))
ax4 = sns.boxplot(data=Rscaled_heart, orient="h", palette="Set3")
Did it Work? Overview of RobustScaler Impact The table below demonstrates that the existence of outliers and skews for each feature have not changed at all since using RobustScaler.
Before vs After: Features with Outliers, Features with skew
| Outliers Before | Outliers After | Skew Before | Skew After | |
|---|---|---|---|---|
| age | NO | NO | NO | NO |
| sex | NO | NO | NO | NO |
| cp | NO | NO | LEFT | LEFT |
| trestbps | YES | YES | NO | NO |
| chol | YES | YES | NO | NO |
| fbs | YES | YES | NO | NO |
| restecg | NO | NO | LEFT | LEFT |
| thalach | YES | YES | NO | NO |
| exang | NO | NO | NO | NO |
| oldpeak | YES | YES | LEFT | LEFT |
| slope | NO | NO | RIGHT | RIGHT |
| ca | YES | YES | LEFT | LEFT |
| thal | YES | YES | RIGHT | RIGHT |
| target | NO | NO | NO | NO |
rscaled_heart_df = pd.DataFrame(data=Rscaled_heart, columns=(['age', 'sex','cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']))
rscaled_heart_df.describe() # Note below that all features now have a median value of 0
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | -0.046938 | -0.316832 | -0.016502 | 0.081188 | 0.098646 | 0.148515 | -0.471947 | -0.103173 | 0.326733 | 0.149752 | 0.399340 | 0.729373 | 0.313531 | -0.455446 |
| std | 0.672748 | 0.466011 | 0.516026 | 0.876907 | 0.816232 | 0.356198 | 0.525860 | 0.704774 | 0.469794 | 0.725672 | 0.616226 | 1.022606 | 0.612277 | 0.498835 |
| min | -1.925926 | -1.000000 | -0.500000 | -1.800000 | -1.795276 | 0.000000 | -1.000000 | -2.523077 | 0.000000 | -0.500000 | -1.000000 | 0.000000 | -2.000000 | -1.000000 |
| 25% | -0.555556 | -1.000000 | -0.500000 | -0.500000 | -0.456693 | 0.000000 | -1.000000 | -0.600000 | 0.000000 | -0.500000 | 0.000000 | 0.000000 | 0.000000 | -1.000000 |
| 50% | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 75% | 0.444444 | 0.000000 | 0.500000 | 0.500000 | 0.543307 | 0.000000 | 0.000000 | 0.400000 | 1.000000 | 0.500000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 |
| max | 1.629630 | 0.000000 | 1.000000 | 3.500000 | 5.102362 | 1.000000 | 1.000000 | 1.507692 | 1.000000 | 3.375000 | 1.000000 | 4.000000 | 1.000000 | 0.000000 |
See below for a summarised comparison of the two dataframes (RobustScaler is presented first).
rscaled_heart_df.head()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.592593 | 0.0 | 1.0 | 0.75 | -0.110236 | 1.0 | -1.0 | -0.092308 | 0.0 | 0.9375 | -1.0 | 0.0 | -1.0 | 0.0 |
| 1 | -1.333333 | 0.0 | 0.5 | 0.00 | 0.157480 | 0.0 | 0.0 | 1.046154 | 0.0 | 1.6875 | -1.0 | 0.0 | 0.0 | 0.0 |
| 2 | -1.037037 | -1.0 | 0.0 | 0.00 | -0.566929 | 0.0 | -1.0 | 0.584615 | 0.0 | 0.3750 | 1.0 | 0.0 | 0.0 | 0.0 |
| 3 | 0.074074 | 0.0 | 0.0 | -0.50 | -0.062992 | 0.0 | 0.0 | 0.769231 | 0.0 | 0.0000 | 1.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0.148148 | -1.0 | -0.5 | -0.50 | 1.795276 | 0.0 | 0.0 | 0.307692 | 1.0 | -0.1250 | 1.0 | 0.0 | 0.0 | 0.0 |
heart.head()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 3 | 145 | 233 | 1 | 0 | 150 | 0 | 2.3 | 0 | 0 | 1 | 1 |
| 1 | 37 | 1 | 2 | 130 | 250 | 0 | 1 | 187 | 0 | 3.5 | 0 | 0 | 2 | 1 |
| 2 | 41 | 0 | 1 | 130 | 204 | 0 | 0 | 172 | 0 | 1.4 | 2 | 0 | 2 | 1 |
| 3 | 56 | 1 | 1 | 120 | 236 | 0 | 1 | 178 | 0 | 0.8 | 2 | 0 | 2 | 1 |
| 4 | 57 | 0 | 0 | 120 | 354 | 0 | 1 | 163 | 1 | 0.6 | 2 | 0 | 2 | 1 |
# Pairwise plot to show every feature plotted against every feature (each feature gets to be both y-axis and x-axis at least once). Hue is set to 'target' with blue plots representing individuals with heart disease (-1.0) and red plots representing those without heart disease (0.0). This plot shows the individual relationships between each pair of features; could be considered a more visual representaion of a correlation matrix.
sns.pairplot(rscaled_heart_df, hue='target', palette='Set1')
<seaborn.axisgrid.PairGrid at 0x13f683e1490>
Numerical categories key:
Pairplot Explained (this will be a lot, skim read for summaries) NB: duplicate pairs and pairs with inverted axis (e.g. thal against thal, age against thal, thal against age) have not been included, only one version of each pair is discussed.
It is very difficult to see any strong pairwise relationships or correlations between the features. The pairplot did not highlight any significant correlations. Next step is to create heatmaps to see this better.
plt.subplots(figsize=(20, 10))
sns.heatmap(rscaled_heart_df.corr(), annot=True, cmap="rainbow")
plt.show()
# See if individual features correlate to target using vertical heatmap (heart disease: -1 = no, 0 = yes).
heart_corr = rscaled_heart_df.corr().loc[:,['target']]
plt.figure(figsize = (3,10))
sns.heatmap(heart_corr,annot = True, cmap='seismic')
<AxesSubplot:>
Individual correlations to target summarised
# Define X and y for train test split
X = rscaled_heart_df.drop(columns=['target'], axis=1)
y = heart.target.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=56) # creates training and test sets
# Make a decision tree
tree_mod = DecisionTreeClassifier(max_leaf_nodes=13)
tree_mod.fit(X_train, y_train)
# Make predictions
pred = tree_mod.predict(X_test)
# Show tree plot (clipped version for readability)
score1 = accuracy_score(y_test, pred)
# print(pred)
print(score1)
tree.plot_tree(tree_mod, filled=True)
plt.show()
0.7868852459016393
See this tutorial of a beginner's guide to deeep learning in python.
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
classifier = Sequential()
# classifier.add(Dense(6, activation='relu', kernel_initializer='glorot_uniform',input_dim=11))
# Adding the input layer and the first hidden layer
classifier.add(Dense(11, kernel_initializer = 'uniform', activation = 'relu', input_dim = 13))
# Adding the second hidden layer
classifier.add(Dense(11, kernel_initializer = 'uniform', activation = 'relu'))
# Adding the output layer
classifier.add(Dense(1, kernel_initializer = 'uniform', activation = 'sigmoid'))
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
Epoch 1/100 25/25 [==============================] - 1s 1ms/step - loss: 0.6928 - accuracy: 0.5248 Epoch 2/100 25/25 [==============================] - 0s 1ms/step - loss: 0.6912 - accuracy: 0.6529 Epoch 3/100 25/25 [==============================] - 0s 1ms/step - loss: 0.6849 - accuracy: 0.8099 Epoch 4/100 25/25 [==============================] - 0s 1ms/step - loss: 0.6670 - accuracy: 0.7934 Epoch 5/100 25/25 [==============================] - 0s 1ms/step - loss: 0.6329 - accuracy: 0.8099 Epoch 6/100 25/25 [==============================] - 0s 1ms/step - loss: 0.5858 - accuracy: 0.7934 Epoch 7/100 25/25 [==============================] - 0s 1ms/step - loss: 0.5398 - accuracy: 0.8017 Epoch 8/100 25/25 [==============================] - 0s 1ms/step - loss: 0.4953 - accuracy: 0.8306 Epoch 9/100 25/25 [==============================] - 0s 1ms/step - loss: 0.4579 - accuracy: 0.8430 Epoch 10/100 25/25 [==============================] - 0s 1ms/step - loss: 0.4273 - accuracy: 0.8512 Epoch 11/100 25/25 [==============================] - 0s 1ms/step - loss: 0.4061 - accuracy: 0.8430 Epoch 12/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3907 - accuracy: 0.8595 Epoch 13/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3803 - accuracy: 0.8678 Epoch 14/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3716 - accuracy: 0.8719 Epoch 15/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3678 - accuracy: 0.8678 Epoch 16/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3636 - accuracy: 0.8678 Epoch 17/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3633 - accuracy: 0.8636 Epoch 18/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3603 - accuracy: 0.8678 Epoch 19/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3592 - accuracy: 0.8678 Epoch 20/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3573 - accuracy: 0.8595 Epoch 21/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3573 - accuracy: 0.8636 Epoch 22/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3551 - accuracy: 0.8595 Epoch 23/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3538 - accuracy: 0.8554 Epoch 24/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3531 - accuracy: 0.8595 Epoch 25/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3516 - accuracy: 0.8595 Epoch 26/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3513 - accuracy: 0.8678 Epoch 27/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3507 - accuracy: 0.8678 Epoch 28/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3489 - accuracy: 0.8636 Epoch 29/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3479 - accuracy: 0.8554 Epoch 30/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3484 - accuracy: 0.8595 Epoch 31/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3517 - accuracy: 0.8678 Epoch 32/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3468 - accuracy: 0.8636 Epoch 33/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3458 - accuracy: 0.8636 Epoch 34/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3455 - accuracy: 0.8636 Epoch 35/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3439 - accuracy: 0.8678 Epoch 36/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3436 - accuracy: 0.8595 Epoch 37/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3426 - accuracy: 0.8636 Epoch 38/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3427 - accuracy: 0.8595 Epoch 39/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3432 - accuracy: 0.8512 Epoch 40/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3411 - accuracy: 0.8636 Epoch 41/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3415 - accuracy: 0.8636 Epoch 42/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3413 - accuracy: 0.8719 Epoch 43/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3415 - accuracy: 0.8678 Epoch 44/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3395 - accuracy: 0.8719 Epoch 45/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3386 - accuracy: 0.8636 Epoch 46/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3395 - accuracy: 0.8595 Epoch 47/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3385 - accuracy: 0.8678 Epoch 48/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3381 - accuracy: 0.8719 Epoch 49/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3367 - accuracy: 0.8595 Epoch 50/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3366 - accuracy: 0.8595 Epoch 51/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3370 - accuracy: 0.8595 Epoch 52/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3362 - accuracy: 0.8595 Epoch 53/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3353 - accuracy: 0.8719 Epoch 54/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3350 - accuracy: 0.8719 Epoch 55/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3351 - accuracy: 0.8678 Epoch 56/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3336 - accuracy: 0.8636 Epoch 57/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3327 - accuracy: 0.8719 Epoch 58/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3311 - accuracy: 0.8719 Epoch 59/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3329 - accuracy: 0.8636 Epoch 60/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3329 - accuracy: 0.8719 Epoch 61/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3313 - accuracy: 0.8760 Epoch 62/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3305 - accuracy: 0.8760 Epoch 63/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3301 - accuracy: 0.8719 Epoch 64/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3296 - accuracy: 0.8843 Epoch 65/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3286 - accuracy: 0.8719 Epoch 66/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3314 - accuracy: 0.8595 Epoch 67/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3291 - accuracy: 0.8636 Epoch 68/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3275 - accuracy: 0.8719 Epoch 69/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3268 - accuracy: 0.8719 Epoch 70/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3265 - accuracy: 0.8719 Epoch 71/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3261 - accuracy: 0.8719 Epoch 72/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3256 - accuracy: 0.8595 Epoch 73/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3270 - accuracy: 0.8719 Epoch 74/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3245 - accuracy: 0.8719 Epoch 75/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3253 - accuracy: 0.8636 Epoch 76/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3238 - accuracy: 0.8719 Epoch 77/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3233 - accuracy: 0.8760 Epoch 78/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3237 - accuracy: 0.8760 Epoch 79/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3240 - accuracy: 0.8678 Epoch 80/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3224 - accuracy: 0.8719 Epoch 81/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3226 - accuracy: 0.8719 Epoch 82/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3223 - accuracy: 0.8760 Epoch 83/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3220 - accuracy: 0.8760 Epoch 84/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3215 - accuracy: 0.8636 Epoch 85/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3196 - accuracy: 0.8760 Epoch 86/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3198 - accuracy: 0.8760 Epoch 87/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3197 - accuracy: 0.8802 Epoch 88/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3182 - accuracy: 0.8802 Epoch 89/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3178 - accuracy: 0.8802 Epoch 90/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3182 - accuracy: 0.8802 Epoch 91/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3172 - accuracy: 0.8802 Epoch 92/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3168 - accuracy: 0.8802 Epoch 93/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3164 - accuracy: 0.8802 Epoch 94/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3163 - accuracy: 0.8802 Epoch 95/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3159 - accuracy: 0.8802 Epoch 96/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3154 - accuracy: 0.8802 Epoch 97/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3149 - accuracy: 0.8802 Epoch 98/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3152 - accuracy: 0.8760 Epoch 99/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3152 - accuracy: 0.8719 Epoch 100/100 25/25 [==============================] - 0s 1ms/step - loss: 0.3137 - accuracy: 0.8760
<keras.callbacks.History at 0x13f7a581a60>
# Predicting the Test set results
y_pred = classifier.predict(X_test)
cm1 = confusion_matrix(y_test, y_pred.round())
sns.heatmap(cm1,annot=True,cmap="Reds",fmt="d",cbar=False)
#accuracy score
ac1 = accuracy_score(y_test, y_pred.round())
print('accuracy of the model: ',ac1)
accuracy of the model: 0.8032786885245902
pd.crosstab(rscaled_heart_df.sex, rscaled_heart_df.target).plot(kind="bar",figsize=(10,5),color=['purple','orange' ])
plt.xlabel('Sex (-1 = Female, 0 = Male)')
plt.xticks(rotation=0)
plt.legend(["No Heart Disease", "Heart Disease"])
plt.ylabel('Frequency')
plt.show()