In
[1]: import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [2]: data = pd.read_csv("D:\\Course\\Python\\Datasets\\pima-indians-diabetes.csv")
In [3]: data
...
In [5]: # Divide Data into X and Y
array = data.values
X = array[:,0:8]
Y = array[:,8]
Hold Out Validations
In [6]: from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
In [7]: # Split the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_
In [8]: model = LogisticRegression()
In [9]: model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
C:\Users\rgandyala\Anaconda3\lib\site-packages\sklearn\linear_model\_logistic.p
y:763: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html (https://scikit-
learn.org/stable/modules/preprocessing.html)
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regressi
on (https://scikit-learn.org/stable/modules/linear_model.html#logistic-regressi
on)
n_iter_i = _check_optimize_result(
In [10]: # Predicting the Test set results
y_pred = model.predict(X_test)
In [11]: # Checking accuracy score
from sklearn.metrics import accuracy_score
accuracy_score(Y_test, y_pred)
Out[11]: 0.7716535433070866
K FOLD Validations
In [12]: from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
In [14]: # Initialize parameters
num_folds = 10
kfold = KFold(n_splits=num_folds)
model1 = LogisticRegression()
In [15]: # Fitting the model and Extracting the results
results1 = cross_val_score(model1, X, Y, cv=kfold)
...
In [16]: results1
...
In [17]: print(results1.mean()*100.0, results1.std()*100.0)
...
Leave One Out Cross Validation
In [18]: from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
In [19]: # Initialize parameters
loocv = LeaveOneOut()
model2 = LogisticRegression()
In [20]: # Fitting the model and Extracting the results
results2 = cross_val_score(model2, X, Y, cv=loocv)
...
In [21]: print(results2.mean()*100.0, results2.std()*100.0)
77.05345501955672 42.04890690023727
Repeated K Fold Cross Validation
In [22]: from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
In [23]: # Initialize parameters
n_splits = 10
kfold3 = RepeatedKFold(n_splits=n_splits, n_repeats=2)
model3 = LogisticRegression()
In [24]: # Fitting the model and Extracting the results
results3 = cross_val_score(model3, X, Y, cv=kfold3)
...
In [26]: # Check the Accuracy
print("Accuracy: ", results3*100.0)
Accuracy: [75.32467532 72.72727273 72.72727273 76.62337662 80.51948052 79.2207
7922
75.32467532 86.84210526 67.10526316 80.26315789 81.81818182 77.92207792
80.51948052 76.62337662 76.62337662 80.51948052 75.32467532 67.10526316
85.52631579 77.63157895]
In [27]: print(results3.mean()*100.0, results3.std()*100.0)
77.31459330143541 4.929298671034974
In [ ]: