8/16/25, 10:40 AM Mock Part1.
ipynb - Colab
from google.colab import files
uploaded = files.upload()
Choose Files No file chosen Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to
enable.
Saving T124OPPE2 Preprocessing V1.csv to T124OPPE2 Preprocessing V1 (2).csv
import numpy as np
import pandas as pd
df = pd.read_csv('T124OPPE2_Preprocessing_V1.csv')
df
Gender Age HasTension AnyHeartDisease NeverMarried Occupation LivesIn GlucoseLevel BMI SmokingStatus HeartAttack
Self-
0 Female 75.0 Yes No Yes City 54.6 35.1 never smoked No
employed
1 Female 49.0 No No Yes Private Village 108.8 26.7 smokes No
2 Male 32.0 No No Yes Private City 64.1 23.4 smokes No
Self-
3 Male 78.0 No No Yes City 219.2 27.4 Unknown Yes
employed
formerly
4 Male 39.0 No No Yes Private City 55.4 41.6 No
smoked
... ... ... ... ... ... ... ... ... ... ... ...
3995 Female 40.0 No No Yes Private City 88.4 36.5 smokes No
3996 Female 18.0 No No No Private Village 168.5 48.2 never smoked No
3997 Male 27.0 No No Yes Private City 76.5 21.0 never smoked No
3998 Female 28.0 No No No Private City 80.0 27.1 smokes No
formerly
df.Gender.value_counts()
count
Gender
Female 2366
Male 1627
Unknown 7
dtype: int64
print(df['Gender'].unique())
['Female' 'Male' 'Unknown']
df.Age.value_counts()
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 1/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
count
Age
78.00 79
45.00 71
57.00 71
53.00 69
54.00 69
... ...
1.72 3
1.16 2
0.40 2
1.40 1
0.08 1
105 rows × 1 columns
dtype: int64
(df.Age<1).sum()
np.int64(40)
print(df['Age'].unique())
[ 7.50e+01 4.90e+01 3.20e+01 7.80e+01 3.90e+01 -3.00e+00 6.30e+01
4.00e+00 4.50e+01 5.20e+01 3.10e+01 5.70e+01 5.60e+01 2.00e+01
2.40e-01 3.80e+01 8.20e+01 3.40e+01 2.90e+01 1.60e+01 7.00e+00
3.70e+01 5.10e+01 2.60e+01 5.30e+01 5.00e+01 2.00e+00 5.40e+01
7.90e+01 6.00e+00 5.80e+01 6.50e+01 1.30e+01 4.70e+01 1.90e+01
7.40e+01 7.30e+01 1.20e+01 9.00e+00 2.70e+01 3.30e+01 8.10e+01
3.60e+01 7.60e+01 7.10e+01 4.60e+01 2.50e+01 1.70e+01 2.20e+01
1.10e+01 5.50e+01 6.10e+01 6.90e+01 7.70e+01 4.20e+01 2.40e+01
7.20e+01 3.50e+01 8.00e+01 1.64e+00 5.90e+01 6.00e+01 4.10e+01
4.30e+01 6.20e+01 6.40e+01 5.00e+00 7.00e+01 2.10e+01 1.00e+01
1.80e+01 1.40e+01 4.00e+01 1.50e+01 3.00e+00 4.80e+01 8.00e+00
1.32e+00 6.70e+01 4.40e+01 6.80e+01 1.56e+00 2.30e+01 6.60e+01
8.00e-01 3.00e+01 1.88e+00 2.80e+01 5.60e-01 6.40e-01 3.20e-01
1.80e+00 1.60e-01 7.20e-01 1.40e+00 8.80e-01 1.08e+00 1.72e+00
1.24e+00 4.00e-01 1.00e+00 1.48e+00 4.80e-01 8.00e-02 1.16e+00]
(df.Age==-3.00e+00).sum()
np.int64(8)
df[df.GlucoseLevel < 0]['GlucoseLevel']
GlucoseLevel
98 -2.0
697 -2.0
829 -2.0
1460 -2.0
2428 -2.0
2543 -2.0
3095 -2.0
3370 -2.0
3986 -2.0
dtype: float64
(df.GlucoseLevel<0).sum()
np.int64(9)
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 2/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
df.LivesIn.value_counts()
count
LivesIn
City 2030
Village 1965
Unknown 5
dtype: int64
df.BMI.unique()
array([35.1, 26.7, 23.4, 27.4, 41.6, 29.3, 37.1, 16.1, 40.5, 15.8, 29.9,
29.5, 40.7, 36.6, 31.5, nan, 12.1, 25.5, 22.3, 27.1, 44.7, 26.1,
18.8, 18.7, 24.8, 17. , 37.6, 20.6, 29. , 56.2, 30.7, 25.3, 23. ,
27.2, 19.2, 31.6, 24.6, 27. , 24.5, 18.2, 52. , 32.3, 42.7, 30. ,
24.3, 24.2, 25.7, 36.7, 46.4, 48.3, 20.9, 24.7, 23.6, 26.5, 39.4,
18.4, 25.6, 25.9, 54.6, 31.9, 14.6, 38.7, 23.7, 27.3, 29.2, 39.7,
30.1, 28.1, 35.7, 14.3, 30.4, 22.2, 35. , 44.5, 36.3, 25.2, 26.6,
31.4, 36.8, 25.8, 38.4, 43.2, 20.4, 30.6, 33.8, 34. , 26.2, 29.6,
30.2, 22.9, 38.9, 16.3, 23.3, 25.1, 34.1, 45.7, 37.3, 26.4, 40.9,
31.1, 17.7, 27.5, 19.9, 32. , 35.9, 32.1, 24.9, 23.8, 18. , 20.7,
27.7, 22.6, 13.1, 19.4, 28.5, 28.8, 21.7, 19.6, 27.8, 41. , 41.8,
35.2, 44.4, 42.6, 15.7, 52.8, 23.1, 38.5, 22.7, 18.3, 42.3, 43.4,
51.5, 24. , 28.7, 23.9, 37.9, 32.6, 35.6, 34.7, 28.3, 33.2, 32.5,
44.1, 34.2, 22. , 33.3, 16.8, 35.4, 20.1, 26.3, 37.5, 33.1, 21.2,
33. , 33.6, 30.3, 26. , 34.8, 31.8, 42.4, 25.4, 23.2, 19.3, 27.9,
36.1, 43. , 16.9, 20.5, 33.9, 28.2, 24.1, 41.1, 32.2, 26.8, 30.8,
22.5, 29.7, 40. , 34.5, 28. , 37.7, 19.8, 28.4, 34.3, 20.8, 16.2,
17.5, 36.9, 19.5, 31.7, 34.4, 29.1, 39.3, 35.5, 21. , 31. , 25. ,
20.3, 17.9, 36.5, 47.5, 19. , 23.5, 38.8, 39.5, 22.1, 30.5, 29.4,
32.4, 16.7, 22.8, 16.4, 24.4, 54.8, 19.1, 39.2, 18.5, 28.6, 48.2,
41.2, 20. , 34.6, 36.4, 29.8, 59.7, 14.4, 28.9, 27.6, 19.7, 32.8,
44.8, 21.1, 16.6, 14.9, 18.9, 17.1, 20.2, 33.7, 38.2, 55.6, 21.5,
32.9, 40.3, 18.1, 38.6, 15.1, 41.5, 21.9, 39.6, 42. , 13.7, 38. ,
41.3, 35.3, 48.6, 41.7, 21.3, 47.6, 30.9, 61.1, 31.3, 38.3, 37.2,
35.8, 49.1, 33.5, 18.6, 44.3, 26.9, 17.2, 31.2, 44. , 49.8, 39.1,
39.8, 39.9, 17.3, 22.4, 21.4, 40.2, 33.4, 56.5, 37.4, 17.4, 16.5,
21.6, 41.4, 37. , 36. , 43.7, 13.3, 17.8, 14.8, 39. , 40.8, 48.4,
43.8, 63.6, 36.2, 42.5, 40.1, 43.9, 15.4, 13.2, 43.5, 58.7, 14. ,
46.8, 43.6, 37.8, 46.3, 45.4, 15.2, 17.6, 32.7, 46.1, 42.1, 58.5,
45.9, 41.9, 50.7, 54. , 21.8, 47. , 66.1, 45.3, 34.9, 42.2, 55.7,
55.1, 45.1, 52.7, 16. , 54.2, 40.6, 49.9, 13.8, 53.4, 46.9, 55.8,
45.6, 43.3, 15.3, 77.9, 47.3, 38.1, 57.9, 53.6, 9.6, 15.5, 49.2,
45. , 49.5, 12.8, 62.6, 43.1, 12.2, 56.8, 45.2, 51.9, 52.1, 62.5,
47.8, 51.3, 45.5, 10.9, 47.4, 49.7, 49.4, 48. , 50.8, 52.4, 45.8,
97. , 13.6, 48.9, 57.3, 14.5, 40.4, 47.9, 50.5, 12.6, 14.1, 57.4,
42.8, 49.3, 46.2, 48.7, 58.4, 53.3, 55. , 46.5, 53.1, 51.8, 50.3,
53.9, 58.1, 13. , 52.2, 15.9, 13.9, 51. , 51.4, 57.6, 46.7, 53. ,
15. , 14.7])
df.BMI.isnull().sum()
np.int64(149)
df.SmokingStatus.value_counts()
count
SmokingStatus
never smoked 1502
Unknown 1204
formerly smoked 697
smokes 597
dtype: int64
df.BMI.mean()
np.float64(28.857958971695663)
df[(df.LivesIn=='City')&(df.SmokingStatus.isin(['formerly smoked','smokes'])) &(df.HeartAttack=='Yes')].shape[0]
52
df.head(5)
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 3/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
Gender Age HasTension AnyHeartDisease NeverMarried Occupation LivesIn GlucoseLevel BMI SmokingStatus HeartAttack
0 Female 75.0 Yes No Yes Self-employed City 54.6 35.1 never smoked No
1 Female 49.0 No No Yes Private Village 108.8 26.7 smokes No
2 Male 32.0 No No Yes Private City 64.1 23.4 smokes No
3 Male 78.0 No No Yes Self-employed City 219.2 27.4 Unknown Yes
4 Male 39.0 No No Yes Private City 55.4 41.6 formerly smoked No
df.NeverMarried.value_counts()
count
NeverMarried
Yes 2626
No 1374
dtype: int64
#Which of the following categories have highest frequency? Ignore rows with missing values.
#female patients without tension, without any heart disease and never married
#female patients without tension, without any heart disease and either currently married or married before
#male patients without tension, without any heart disease and never married
#male patients with tension, with a heart disease and never married
#There is a tie between 2 or more options.
df[(df.Gender=='Female')&(df.HasTension=='No')&(df.AnyHeartDisease=='No')&(df.NeverMarried=='Yes')].shape[0]
1335
df[(df.Gender=='Female')&(df.HasTension=='No')&(df.AnyHeartDisease=='No')&(df.NeverMarried=='No')].shape[0]
754
df[(df.Gender=='Male')&(df.HasTension=='No')&(df.AnyHeartDisease=='No')&(df.NeverMarried=='Yes')].shape[0]
795
df[(df.Gender=='Male')&(df.HasTension=='Yes')&(df.AnyHeartDisease=='Yes')&(df.NeverMarried=='Yes')].shape[0]
24
df.select_dtypes(include=['object'])
Gender HasTension AnyHeartDisease NeverMarried Occupation LivesIn SmokingStatus HeartAttack
0 Female Yes No Yes Self-employed City never smoked No
1 Female No No Yes Private Village smokes No
2 Male No No Yes Private City smokes No
3 Male No No Yes Self-employed City Unknown Yes
4 Male No No Yes Private City formerly smoked No
... ... ... ... ... ... ... ... ...
3995 Female No No Yes Private City smokes No
3996 Female No No No Private Village never smoked No
3997 Male No No Yes Private City never smoked No
3998 Female No No No Private City smokes No
3999 Female No No Yes Private Village formerly smoked Yes
4000 rows × 8 columns
df.HeartAttack.value_counts()
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 4/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
count
HeartAttack
No 3806
Yes 194
dtype: int64
from sklearn.model_selection import train_test_split
df.HeartAttack=df.HeartAttack.map({'Yes':1,'No':0})
df.HeartAttack
HeartAttack
0 0
1 0
2 0
3 1
4 0
... ...
3995 0
3996 0
3997 0
3998 0
3999 1
4000 rows × 1 columns
dtype: int64
X= df.drop(columns='HeartAttack')
y = df.HeartAttack
HeartAttack
0 0
1 0
2 0
3 1
4 0
... ...
3995 0
3996 0
3997 0
3998 0
3999 1
4000 rows × 1 columns
dtype: int64
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0,stratify=y)
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 5/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
gender_pipe = Pipeline([('imputer',SimpleImputer(strategy='most_frequent',missing_values='Unknown')),('ordinal',OrdinalEncoder())])
age_pipe = Pipeline([('imputer',SimpleImputer(strategy='mean',missing_values=-3)),('scaler',StandardScaler())])
tension_pipe = Pipeline([('ordinal',OrdinalEncoder())])
any_pipe = Pipeline([('ordinal',OrdinalEncoder())])
never_pipe = Pipeline([('ordinal',OrdinalEncoder())])
occ_pipe = Pipeline([('onehot',OneHotEncoder(sparse_output=False))])
livesin_pipe = Pipeline([('imputer',SimpleImputer(strategy='most_frequent',missing_values='Unknown')),('ordinal',OrdinalEncoder())])
from sklearn.preprocessing import MinMaxScaler
glucose_pipe = Pipeline([('imputer',SimpleImputer(strategy='mean',missing_values=-2)),('minmax',MinMaxScaler())])
bmi_pipe = Pipeline([('imputer',SimpleImputer(strategy='mean',missing_values=np.nan)),('scaler',StandardScaler())])
status_pipe = Pipeline([('imputer',SimpleImputer(strategy='most_frequent',missing_values='Unknown')),('onehot',OneHotEncoder(sparse_outp
df.columns
Index(['Gender', 'Age', 'HasTension', 'AnyHeartDisease', 'NeverMarried',
'Occupation', 'LivesIn', 'GlucoseLevel', 'BMI', 'SmokingStatus',
'HeartAttack'],
dtype='object')
pre = ColumnTransformer([('gender',gender_pipe,['Gender']),('age',age_pipe,['Age']),('tension',tension_pipe,['HasTension']),
('any',any_pipe,['AnyHeartDisease']),('never',never_pipe,['NeverMarried']),
('occ',occ_pipe,['Occupation']), ('lives',livesin_pipe,['LivesIn']),('glucose',glucose_pipe,['GlucoseLevel']),(
('status',status_pipe,['SmokingStatus'])],verbose_feature_names_out=False,remainder='drop').set_output(transfor
pre
▸ gender ▸ age ▸ tension ▸ any ▸ n
▸ SimpleImputer ? ▸ SimpleImputer ? ▸ OrdinalEncoder ? ▸ OrdinalEncoder ? ▸ Ordinal
▸ OrdinalEncoder ? ▸ StandardScaler ?
X_train.columns
Index(['Gender', 'Age', 'HasTension', 'AnyHeartDisease', 'NeverMarried',
'Occupation_Govt_job', 'Occupation_Never_worked', 'Occupation_Private',
'Occupation_Self-employed', 'Occupation_children', 'LivesIn',
'GlucoseLevel', 'BMI', 'SmokingStatus_formerly smoked',
'SmokingStatus_never smoked', 'SmokingStatus_smokes'],
dtype='object')
X_train= pre.fit_transform(X_train)
X_test = pre.transform(X_test)
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 6/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.11/dist-packages/pandas/core/indexes/base.py in get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
index.pyx in pandas._libs.index.IndexEngine.get_loc()
index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Occupation'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
7 frames
KeyError: 'Occupation'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
/usr/local/lib/python3.11/dist-packages/sklearn/utils/_indexing.py in _get_column_indices(X, key)
370
371 except KeyError as e:
--> 372 raise ValueError("A given column is not a column of the dataframe") from e
373
374 return column_indices
ValueError: A given column is not a column of the dataframe
X_train.shape
(2800, 16)
X_train.columns
Index(['Gender', 'Age', 'HasTension', 'AnyHeartDisease', 'NeverMarried',
'Occupation', 'LivesIn', 'GlucoseLevel', 'BMI', 'SmokingStatus',
'HeartAttack'],
dtype='object')
X_test.mean().mean()
np.float64(0.24624313115075144)
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state=1729)
rfe = RFE(estimator= model , n_features_to_select= X_train.shape[1]-1)
rfe.fit(X_train,y_train)
index = list(rfe.support_).index(False)
print(index)
15
from google.colab import files
uploaded = files.upload()
Choose Files No file chosen Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to
enable.
df = pd.read_csv('T124OPPE2_ModelBuilding_V1.csv')
df
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 7/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
Gender Age HasTension AnyHeartDisease NeverMarried Occupation_Govt_job Occupation_Never_worked Occupation_Private
0 0.0 0.433901 0.0 0.0 1.0 0.0 0.0 0.0
1 1.0 -1.840435 0.0 0.0 0.0 0.0 0.0 0.0
2 1.0 -1.160260 0.0 0.0 0.0 1.0 0.0 0.0
3 1.0 -0.806002 0.0 0.0 1.0 0.0 0.0 1.0
4 0.0 0.743876 0.0 0.0 1.0 0.0 0.0 1.0
... ... ... ... ... ... ... ... ...
3995 0.0 0.389618 0.0 0.0 1.0 0.0 0.0 1.0
3996 1.0 1.452392 1.0 0.0 1.0 0.0 0.0 0.0
3997 0.0 0.433901 1.0 0.0 1.0 0.0 0.0 1.0
3998 1.0 0.921005 0.0 1.0 1.0 0.0 0.0 1.0
3999 0.0 -1.558800 0.0 0.0 0.0 0.0 0.0 1.0
4000 rows × 17 columns
X = df.drop(columns='HeartAttack')
y = df.HeartAttack
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0,shuffle=False)
from sklearn.linear_model import Perceptron
from sklearn.metrics import precision_score
model = Perceptron(
random_state=1729,
eta0=1,
max_iter=1,
shuffle=False,
validation_fraction=0.1,
alpha=0
)
for i in range(5):
model.partial_fit(X_train,y_train,[0,1])
y_pred = model.predict(X_train)
print(precision_score(y_train,y_pred))
print(model.intercept_)
0.3333333333333333
[-4.]
0.13333333333333333
[-3.]
0.15384615384615385
[-3.]
0.0
[-4.]
0.6666666666666666
[-3.]
/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined an
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import log_loss
clf = SGDClassifier(
loss="log_loss",
penalty="l2",
eta0=0.001,
alpha=0,
learning_rate="constant",
random_state=1729,
warm_start=True,
max_iter=1
for i in range(5):
clf.fit(X_train,y_train)
y_pred = clf.predict_proba(X_train)
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 8/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
print(log_loss(y_train,y_pred))
0.2529904609012919
0.20828682141739835
0.19406901833322654
0.18699850891012404
0.18255077295024025
/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_stochastic_gradient.py:738: ConvergenceWarning: Maximum number of iter
warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_stochastic_gradient.py:738: ConvergenceWarning: Maximum number of iter
warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_stochastic_gradient.py:738: ConvergenceWarning: Maximum number of iter
warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_stochastic_gradient.py:738: ConvergenceWarning: Maximum number of iter
warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_stochastic_gradient.py:738: ConvergenceWarning: Maximum number of iter
warnings.warn(
sgd = SGDClassifier(loss='log_loss',learning_rate='constant',random_state=1729)
from sklearn.model_selection import GridSearchCV
params = {
'alpha':[0.0001, 0.0005, 0.001, 0.005],
'eta0' : [0.01, 0.05, 0.1, 0.5]
}
grid = GridSearchCV(estimator= sgd ,param_grid=params)
grid.fit(X_train,y_train)
▸ GridSearchCV
i ?
▸ best_estimator_:
SGDClassifier
▸ SGDClassifier ?
grid.best_params_
{'alpha': 0.0001, 'eta0': 0.01}
sgd = SGDClassifier(learning_rate='constant',
random_state=1729,
loss='log_loss',
alpha=0.0001,
eta0=0.01,
class_weight={0: 0.1, 1: 2})
sgd.fit(X_train,y_train)
y_pred = sgd.predict(X_test)
correct = ((y_test==1)&(y_pred==1)).sum()
print(correct)
47
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
model = SVC( kernel='rbf',
decision_function_shape='ovr',
random_state=1729,
C=1)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print (cm)
[[1142 0]
[ 58 0]]
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(criterion = 'entropy'
splitter = 'random',
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 9/10
8/16/25, 10:40 AM Mock Part1.ipynb - Colab
min_samples_split = 4,
min_impurity_decrease = 0.0001,
random_state = 1729)
( )
▾ DecisionTreeClassifier i ?
DecisionTreeClassifier(criterion='entropy', min_impurity_decrease=0.0001,
min_samples_split=4, random_state=1729,
splitter='random')
model.tree_.max_depth
20
model.tree_.node_count
515
model.tree_.impurity[1]
np.float64(0.024564134553940277)
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
dt = DecisionTreeClassifier(random_state=1729)
kn = KNeighborsClassifier()
lg = LogisticRegression(random_state=1729)
bagging = BaggingClassifier(estimator=dt,
n_estimators=20,random_state=1729)
bagging.fit(X_train,y_train)
y_pred = bagging.predict(X_test)
print(accuracy_score(y_test,y_pred))
0.9441666666666667
bagging = BaggingClassifier(estimator=kn,
n_estimators=20,random_state=1729)
bagging.fit(X_train,y_train)
y_pred = bagging.predict(X_test)
print(accuracy_score(y_test,y_pred))
0.9508333333333333
bagging = BaggingClassifier(estimator=lg,
n_estimators=20,random_state=1729)
bagging.fit(X_train,y_train)
y_pred = bagging.predict(X_test)
print(accuracy score(y test,y pred))
https://colab.research.google.com/drive/1OZ2k68niyZvdze7apDHdVFh1ZtqloHYE#printMode=true 10/10