import numpy as np 
import numpy.matlib
import pandas as pd 
import matplotlib.pyplot as plt  
from scipy.spatial import distance_matrix
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
import seaborn as sns 
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_recall_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from sklearn.svm import SVC
%matplotlib inline


heart = pd.read_csv('heart.csv')
heart.head(10)


heart.describe()


heart.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


heart.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64


heart.shape

(303, 14)


# Create some smaller data frames (each df contains columns of similar ranges) so we can have multiple boxplots in each figure
heartbox1 = pd.DataFrame(heart, columns=["age", "trestbps", "chol", "thalach"])
# Check to see above worked and that ranges are in similar range (e.g. tens and hundreds)
heartbox1.describe()


heartbox2 = pd.DataFrame(heart, columns=["cp", "restecg", "oldpeak", "slope", "ca", "thal"])
heartbox2.describe()


heartbox3 = pd.DataFrame(heart, columns=["sex", "fbs", "exang", "target"])
heartbox3.describe()


# Create boxplot for each feature with range between 10s and 100s

plt.figure(figsize=(20,10))
ax1 = sns.boxplot(data=heartbox1, orient="h", palette="Set1")


# Get closer look at age 
ax1a = sns.boxplot(x=heart["age"])


# Create boxplot for each feature with range between 0 and less than 10
plt.figure(figsize=(20,10))
ax2 = sns.boxplot(data=heartbox2, orient="h", palette="Set2")


# Create boxplot for each feature with range between 0 and 1
plt.figure(figsize=(20,10))
ax3 = sns.boxplot(data=heartbox3, orient="h", palette="Set3")


# Apply the RobustScaler

Rscaler = RobustScaler(quantile_range=(25,75))
Rscaled_heart = Rscaler.fit_transform(heart)


print('The means after using RobustScaler for heart is: ', Rscaled_heart.mean(axis=0))
print('The standard deviations after using RobustScaler for heart is: ', Rscaled_heart.std(axis=0))

The means after using RobustScaler for heart is:  [-0.04693803 -0.31683168 -0.01650165  0.08118812  0.09864609  0.14851485
 -0.47194719 -0.10317339  0.32673267  0.14975248  0.39933993  0.72937294
  0.31353135 -0.45544554]
The standard deviations after using RobustScaler for heart is:  [0.67163716 0.46524119 0.51517401 0.8754589  0.81488427 0.3556096
 0.52499112 0.70361023 0.46901859 0.72447342 0.61520843 1.0209175
 0.61126531 0.49801094]


print('The means before using RobustScaler for heart is: \n', heart.mean(axis=0))
print('The standard deviations before using RobustScaler for heart is: \n', heart.std(axis=0))

The means before using RobustScaler for heart is: 
 age          54.366337
sex           0.683168
cp            0.966997
trestbps    131.623762
chol        246.264026
fbs           0.148515
restecg       0.528053
thalach     149.646865
exang         0.326733
oldpeak       1.039604
slope         1.399340
ca            0.729373
thal          2.313531
target        0.544554
dtype: float64
The standard deviations before using RobustScaler for heart is: 
 age          9.082101
sex          0.466011
cp           1.032052
trestbps    17.538143
chol        51.830751
fbs          0.356198
restecg      0.525860
thalach     22.905161
exang        0.469794
oldpeak      1.161075
slope        0.616226
ca           1.022606
thal         0.612277
target       0.498835
dtype: float64


compare = {'Old Mean':[54.366337, 0.683168, 0.966997, 131.623762, 246.264026, 0.148515, 0.528053, 149.646865, 0.326733, 1.039604, 1.399340, 0.729373, 2.313531, 0.544554], 
'New Mean':[-0.04693803, -0.31683168, -0.01650165,  0.08118812,  0.09864609,  0.14851485, -0.47194719, -0.10317339,  0.32673267,  0.14975248,  0.39933993, 0.72937294, 0.31353135, -0.45544554], 
'Old Std Dev':[9.082101, 0.466011, 1.032052, 17.538143, 51.830751, 0.356198, 0.525860, 22.905161, 0.469794, 1.161075, 0.616226, 1.022606, 0.612277, 0.498835], 
'New Std Dev':[0.67163716, 0.46524119, 0.51517401, 0.8754589,  0.81488427, 0.3556096, 0.52499112, 0.70361023, 0.46901859, 0.72447342, 0.61520843, 1.0209175, 0.61126531, 0.49801094]}

compare_df = pd.DataFrame(data=compare)
compare_df


# Plot boxplot for RobustScaler version of heart data to demonstrate that all data is within similar ranges of each other and have maintained individual range integrity.
plt.figure(figsize=(20,10))
ax4 = sns.boxplot(data=Rscaled_heart, orient="h", palette="Set3")


rscaled_heart_df = pd.DataFrame(data=Rscaled_heart, columns=(['age', 'sex','cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']))
rscaled_heart_df.describe() # Note below that all features now have a median value of 0


rscaled_heart_df.head()


heart.head()


# Pairwise plot to show every feature plotted against every feature (each feature gets to be both y-axis and x-axis at least once). Hue is set to 'target' with blue plots representing individuals with heart disease (-1.0) and red plots representing those without heart disease (0.0). This plot shows the individual relationships between each pair of features; could be considered a more visual representaion of a correlation matrix.
sns.pairplot(rscaled_heart_df, hue='target', palette='Set1')

<seaborn.axisgrid.PairGrid at 0x13f683e1490>


plt.subplots(figsize=(20, 10))
sns.heatmap(rscaled_heart_df.corr(), annot=True, cmap="rainbow")
plt.show()


# See if individual features correlate to target using vertical heatmap (heart disease: -1 = no, 0 = yes).

heart_corr = rscaled_heart_df.corr().loc[:,['target']]
plt.figure(figsize = (3,10))
sns.heatmap(heart_corr,annot = True, cmap='seismic')

<AxesSubplot:>


# Define X and y for train test split
X = rscaled_heart_df.drop(columns=['target'], axis=1)
y = heart.target.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=56) # creates training and test sets


# Make a decision tree
tree_mod = DecisionTreeClassifier(max_leaf_nodes=13)
tree_mod.fit(X_train, y_train)
# Make predictions
pred = tree_mod.predict(X_test)
# Show tree plot (clipped version for readability)
score1 = accuracy_score(y_test, pred)
# print(pred)
print(score1)
tree.plot_tree(tree_mod, filled=True)
plt.show()

0.7868852459016393


from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils


classifier = Sequential()

# classifier.add(Dense(6, activation='relu', kernel_initializer='glorot_uniform',input_dim=11))

# Adding the input layer and the first hidden layer
classifier.add(Dense(11, kernel_initializer = 'uniform', activation = 'relu', input_dim = 13))

# Adding the second hidden layer
classifier.add(Dense(11, kernel_initializer = 'uniform', activation = 'relu'))

# Adding the output layer
classifier.add(Dense(1, kernel_initializer = 'uniform', activation = 'sigmoid'))

# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])


classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)

Epoch 1/100
25/25 [==============================] - 1s 1ms/step - loss: 0.6928 - accuracy: 0.5248
Epoch 2/100
25/25 [==============================] - 0s 1ms/step - loss: 0.6912 - accuracy: 0.6529
Epoch 3/100
25/25 [==============================] - 0s 1ms/step - loss: 0.6849 - accuracy: 0.8099
Epoch 4/100
25/25 [==============================] - 0s 1ms/step - loss: 0.6670 - accuracy: 0.7934
Epoch 5/100
25/25 [==============================] - 0s 1ms/step - loss: 0.6329 - accuracy: 0.8099
Epoch 6/100
25/25 [==============================] - 0s 1ms/step - loss: 0.5858 - accuracy: 0.7934
Epoch 7/100
25/25 [==============================] - 0s 1ms/step - loss: 0.5398 - accuracy: 0.8017
Epoch 8/100
25/25 [==============================] - 0s 1ms/step - loss: 0.4953 - accuracy: 0.8306
Epoch 9/100
25/25 [==============================] - 0s 1ms/step - loss: 0.4579 - accuracy: 0.8430
Epoch 10/100
25/25 [==============================] - 0s 1ms/step - loss: 0.4273 - accuracy: 0.8512
Epoch 11/100
25/25 [==============================] - 0s 1ms/step - loss: 0.4061 - accuracy: 0.8430
Epoch 12/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3907 - accuracy: 0.8595
Epoch 13/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3803 - accuracy: 0.8678
Epoch 14/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3716 - accuracy: 0.8719
Epoch 15/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3678 - accuracy: 0.8678
Epoch 16/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3636 - accuracy: 0.8678
Epoch 17/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3633 - accuracy: 0.8636
Epoch 18/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3603 - accuracy: 0.8678
Epoch 19/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3592 - accuracy: 0.8678
Epoch 20/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3573 - accuracy: 0.8595
Epoch 21/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3573 - accuracy: 0.8636
Epoch 22/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3551 - accuracy: 0.8595
Epoch 23/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3538 - accuracy: 0.8554
Epoch 24/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3531 - accuracy: 0.8595
Epoch 25/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3516 - accuracy: 0.8595
Epoch 26/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3513 - accuracy: 0.8678
Epoch 27/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3507 - accuracy: 0.8678
Epoch 28/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3489 - accuracy: 0.8636
Epoch 29/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3479 - accuracy: 0.8554
Epoch 30/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3484 - accuracy: 0.8595
Epoch 31/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3517 - accuracy: 0.8678
Epoch 32/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3468 - accuracy: 0.8636
Epoch 33/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3458 - accuracy: 0.8636
Epoch 34/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3455 - accuracy: 0.8636
Epoch 35/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3439 - accuracy: 0.8678
Epoch 36/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3436 - accuracy: 0.8595
Epoch 37/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3426 - accuracy: 0.8636
Epoch 38/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3427 - accuracy: 0.8595
Epoch 39/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3432 - accuracy: 0.8512
Epoch 40/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3411 - accuracy: 0.8636
Epoch 41/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3415 - accuracy: 0.8636
Epoch 42/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3413 - accuracy: 0.8719
Epoch 43/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3415 - accuracy: 0.8678
Epoch 44/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3395 - accuracy: 0.8719
Epoch 45/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3386 - accuracy: 0.8636
Epoch 46/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3395 - accuracy: 0.8595
Epoch 47/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3385 - accuracy: 0.8678
Epoch 48/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3381 - accuracy: 0.8719
Epoch 49/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3367 - accuracy: 0.8595
Epoch 50/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3366 - accuracy: 0.8595
Epoch 51/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3370 - accuracy: 0.8595
Epoch 52/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3362 - accuracy: 0.8595
Epoch 53/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3353 - accuracy: 0.8719
Epoch 54/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3350 - accuracy: 0.8719
Epoch 55/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3351 - accuracy: 0.8678
Epoch 56/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3336 - accuracy: 0.8636
Epoch 57/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3327 - accuracy: 0.8719
Epoch 58/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3311 - accuracy: 0.8719
Epoch 59/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3329 - accuracy: 0.8636
Epoch 60/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3329 - accuracy: 0.8719
Epoch 61/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3313 - accuracy: 0.8760
Epoch 62/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3305 - accuracy: 0.8760
Epoch 63/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3301 - accuracy: 0.8719
Epoch 64/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3296 - accuracy: 0.8843
Epoch 65/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3286 - accuracy: 0.8719
Epoch 66/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3314 - accuracy: 0.8595
Epoch 67/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3291 - accuracy: 0.8636
Epoch 68/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3275 - accuracy: 0.8719
Epoch 69/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3268 - accuracy: 0.8719
Epoch 70/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3265 - accuracy: 0.8719
Epoch 71/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3261 - accuracy: 0.8719
Epoch 72/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3256 - accuracy: 0.8595
Epoch 73/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3270 - accuracy: 0.8719
Epoch 74/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3245 - accuracy: 0.8719
Epoch 75/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3253 - accuracy: 0.8636
Epoch 76/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3238 - accuracy: 0.8719
Epoch 77/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3233 - accuracy: 0.8760
Epoch 78/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3237 - accuracy: 0.8760
Epoch 79/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3240 - accuracy: 0.8678
Epoch 80/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3224 - accuracy: 0.8719
Epoch 81/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3226 - accuracy: 0.8719
Epoch 82/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3223 - accuracy: 0.8760
Epoch 83/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3220 - accuracy: 0.8760
Epoch 84/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3215 - accuracy: 0.8636
Epoch 85/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3196 - accuracy: 0.8760
Epoch 86/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3198 - accuracy: 0.8760
Epoch 87/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3197 - accuracy: 0.8802
Epoch 88/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3182 - accuracy: 0.8802
Epoch 89/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3178 - accuracy: 0.8802
Epoch 90/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3182 - accuracy: 0.8802
Epoch 91/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3172 - accuracy: 0.8802
Epoch 92/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3168 - accuracy: 0.8802
Epoch 93/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3164 - accuracy: 0.8802
Epoch 94/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3163 - accuracy: 0.8802
Epoch 95/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3159 - accuracy: 0.8802
Epoch 96/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3154 - accuracy: 0.8802
Epoch 97/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3149 - accuracy: 0.8802
Epoch 98/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3152 - accuracy: 0.8760
Epoch 99/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3152 - accuracy: 0.8719
Epoch 100/100
25/25 [==============================] - 0s 1ms/step - loss: 0.3137 - accuracy: 0.8760

<keras.callbacks.History at 0x13f7a581a60>


# Predicting the Test set results
y_pred = classifier.predict(X_test)


cm1 = confusion_matrix(y_test, y_pred.round())
sns.heatmap(cm1,annot=True,cmap="Reds",fmt="d",cbar=False)
#accuracy score

ac1 = accuracy_score(y_test, y_pred.round())
print('accuracy of the model: ',ac1)

accuracy of the model:  0.8032786885245902


pd.crosstab(rscaled_heart_df.sex, rscaled_heart_df.target).plot(kind="bar",figsize=(10,5),color=['purple','orange' ])
plt.xlabel('Sex (-1 = Female, 0 = Male)')
plt.xticks(rotation=0)
plt.legend(["No Heart Disease", "Heart Disease"])
plt.ylabel('Frequency')
plt.show()

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	ca	thal	target
count	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000
mean	54.366337	0.683168	0.966997	131.623762	246.264026	0.148515	0.528053	149.646865	0.326733	1.039604	1.399340	0.729373	2.313531	0.544554
std	9.082101	0.466011	1.032052	17.538143	51.830751	0.356198	0.525860	22.905161	0.469794	1.161075	0.616226	1.022606	0.612277	0.498835
min	29.000000	0.000000	0.000000	94.000000	126.000000	0.000000	0.000000	71.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	47.500000	0.000000	0.000000	120.000000	211.000000	0.000000	0.000000	133.500000	0.000000	0.000000	1.000000	0.000000	2.000000	0.000000
50%	55.000000	1.000000	1.000000	130.000000	240.000000	0.000000	1.000000	153.000000	0.000000	0.800000	1.000000	0.000000	2.000000	1.000000
75%	61.000000	1.000000	2.000000	140.000000	274.500000	0.000000	1.000000	166.000000	1.000000	1.600000	2.000000	1.000000	3.000000	1.000000
max	77.000000	1.000000	3.000000	200.000000	564.000000	1.000000	2.000000	202.000000	1.000000	6.200000	2.000000	4.000000	3.000000	1.000000

	age	trestbps	chol	thalach
count	303.000000	303.000000	303.000000	303.000000
mean	54.366337	131.623762	246.264026	149.646865
std	9.082101	17.538143	51.830751	22.905161
min	29.000000	94.000000	126.000000	71.000000
25%	47.500000	120.000000	211.000000	133.500000
50%	55.000000	130.000000	240.000000	153.000000
75%	61.000000	140.000000	274.500000	166.000000
max	77.000000	200.000000	564.000000	202.000000

	cp	restecg	oldpeak	slope	ca	thal
count	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000
mean	0.966997	0.528053	1.039604	1.399340	0.729373	2.313531
std	1.032052	0.525860	1.161075	0.616226	1.022606	0.612277
min	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	0.000000	0.000000	0.000000	1.000000	0.000000	2.000000
50%	1.000000	1.000000	0.800000	1.000000	0.000000	2.000000
75%	2.000000	1.000000	1.600000	2.000000	1.000000	3.000000
max	3.000000	2.000000	6.200000	2.000000	4.000000	3.000000

	sex	fbs	exang	target
count	303.000000	303.000000	303.000000	303.000000
mean	0.683168	0.148515	0.326733	0.544554
std	0.466011	0.356198	0.469794	0.498835
min	0.000000	0.000000	0.000000	0.000000
25%	0.000000	0.000000	0.000000	0.000000
50%	1.000000	0.000000	0.000000	1.000000
75%	1.000000	0.000000	1.000000	1.000000
max	1.000000	1.000000	1.000000	1.000000

	Outliers Before	Outliers After	Skew Before	Skew After
age	NO	NO	NO	NO
sex	NO	NO	NO	NO
cp	NO	NO	LEFT	LEFT
trestbps	YES	YES	NO	NO
chol	YES	YES	NO	NO
fbs	YES	YES	NO	NO
restecg	NO	NO	LEFT	LEFT
thalach	YES	YES	NO	NO
exang	NO	NO	NO	NO
oldpeak	YES	YES	LEFT	LEFT
slope	NO	NO	RIGHT	RIGHT
ca	YES	YES	LEFT	LEFT
thal	YES	YES	RIGHT	RIGHT
target	NO	NO	NO	NO

Neural Networks Assignment: Heart Disease Classification¶

Introduction and Task Details¶

Importing Python Packages and the Dataset¶

Column Names Explained¶

Data Quality Assessment¶

Visual Data Quality Assessment¶

Data Preprocessing¶

Correlations¶

Split dataset for models¶

Models (currently unfinished)¶

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1
5	57	1	0	140	192	0	1	148	0	0.4	1	1	1
6	56	0	1	140	294	0	0	153	0	1.3	1	2	1
7	44	1	1	120	263	0	1	173	0	0.0	2	3	1
8	52	1	2	172	199	1	1	162	0	0.5	2	3	1
9	57	1	2	150	168	0	1	174	0	1.6	2	2	1

	Old Mean	New Mean	Old Std Dev	New Std Dev
0	54.366337	-0.046938	9.082101	0.671637
1	0.683168	-0.316832	0.466011	0.465241
2	0.966997	-0.016502	1.032052	0.515174
3	131.623762	0.081188	17.538143	0.875459
4	246.264026	0.098646	51.830751	0.814884
5	0.148515	0.148515	0.356198	0.355610
6	0.528053	-0.471947	0.525860	0.524991
7	149.646865	-0.103173	22.905161	0.703610
8	0.326733	0.326733	0.469794	0.469019
9	1.039604	0.149752	1.161075	0.724473
10	1.399340	0.399340	0.616226	0.615208
11	0.729373	0.729373	1.022606	1.020917
12	2.313531	0.313531	0.612277	0.611265
13	0.544554	-0.455446	0.498835	0.498011

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal
0	0.592593	0.0	1.0	0.75	-0.110236	1.0	-1.0	-0.092308	0.0	0.9375	-1.0	-1.0
1	-1.333333	0.0	0.5	0.00	0.157480	0.0	0.0	1.046154	0.0	1.6875	-1.0	0.0
2	-1.037037	-1.0	0.0	0.00	-0.566929	0.0	-1.0	0.584615	0.0	0.3750	1.0	0.0
3	0.074074	0.0	0.0	-0.50	-0.062992	0.0	0.0	0.769231	0.0	0.0000	1.0	0.0
4	0.148148	-1.0	-0.5	-0.50	1.795276	0.0	0.0	0.307692	1.0	-0.1250	1.0	0.0

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1
5	57	1	0	140	192	0	1	148	0	0.4	1	1	1
6	56	0	1	140	294	0	0	153	0	1.3	1	2	1
7	44	1	1	120	263	0	1	173	0	0.0	2	3	1
8	52	1	2	172	199	1	1	162	0	0.5	2	3	1
9	57	1	2	150	168	0	1	174	0	1.6	2	2	1

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1
5	57	1	0	140	192	0	1	148	0	0.4	1	1	1
6	56	0	1	140	294	0	0	153	0	1.3	1	2	1
7	44	1	1	120	263	0	1	173	0	0.0	2	3	1
8	52	1	2	172	199	1	1	162	0	0.5	2	3	1
9	57	1	2	150	168	0	1	174	0	1.6	2	2	1

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1