SKlearn مكتبة:القسم العاشر
A. Data Preparation 12. Naïve Bayes
1. Data files from SKlearn 13. LDA , QDA
2. Data cleaning 14. Hierarchical Clusters
3. Metrics module 15. DbScan
4. Feature Selection 16. NLP
5. Data Scaling 17. Apriori
6. Data Split
C. Algorithm Evaluation :
B. ML Algorithms 1. Model Check
1. Linear Regression 2. Grid Search
2. Logistic Regression 3. Pipeline
3. Neural Network 4. Model Save
4. SVR
5. SVC D. Time Series
6. K-means
7. PCA
8. Decision Tree
9. Ensemble Regression
10. Ensemble Classifier
11. K Nearest Neighbors
1
3.1) Model Check
ومقارنته بعدد من الموديلز االخري, وهي عملية فحص لكفاءة الموديل
فيهاKFolds يتم استخدام فكرة
model_selection يتم استخدامها من الموديول
: لها اكثر من اداة مثل
3.1.1 model_selection.cross_validate
3.1.2 model_selection.cross_val_predict
3.1.3 model_selection.cross_val_score
2
3.1.1) cross_validate
و هي تقوم بحساب العديد من االرقام الهامة الي موديل بعد عمل ال fittingمثل )fit time , test r2 , train r2( :
يتم استخدامها عبر الموديول model_selection.cross_validate
3
الصيغة العامة
#Import Libraries
from sklearn.model_selection import cross_validate
#----------------------------------------------------
#Applying Cross Validate :
'''
model_selection.cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=’warn’, n_jobs=None,
verbose=0,fit_params=None, pre_dispatch=‘2*n_jobs’, return_train_score=’warn’,
return_estimator=False,error_score=’raise-deprecating’)
'''
# don't forget to define the model first !!!
CrossValidateValues1 = cross_validate(SelectedModel,X,y,cv=3,return_train_score = True)
CrossValidateValues2 = cross_validate(SelectedModel,X,y,cv=3,scoring=('r2','neg_mean_squared_error'))
# Showing Results
print('Train Score Value : ', CrossValidateValues1['train_score'])
print('Test Score Value : ', CrossValidateValues1['test_score'])
print('Fit Time : ', CrossValidateValues1['fit_time'])
print('Score Time : ', CrossValidateValues1['score_time'])
4
print('Train MSE Value : ', CrossValidateValues2['train_neg_mean_squared_error'])
print('Test MSE Value : ', CrossValidateValues2['test_neg_mean_squared_error'])
print('Train R2 Value : ', CrossValidateValues2['train_r2'])
print('Test R2 Value : ', CrossValidateValues2['test_r2'])
5
مثال
#Import Libraries
from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import cross_validate
#----------------------------------------------------
#load boston data
BostonData = load_boston()
#X Data
X = BostonData.data
#y Data
y = BostonData.target
#----------------------------------------------------
#Applying SGDRegressor Model
SGDRegressionModel = SGDRegressor(alpha=0.1,random_state=33,penalty='l2',loss = 'huber')
6
#----------------------------------------------------
#Applying Cross Validate :
'''
model_selection.cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=’warn’, n_jobs=None,
verbose=0,fit_params=None, pre_dispatch=‘2*n_jobs’, return_train_score=’warn’,
return_estimator=False,error_score=’raise-deprecating’)
'''
# don't forget to define the model first !!!
CrossValidateValues1 = cross_validate(SGDRegressionModel,X,y,cv=3,return_train_score = True)
CrossValidateValues2 = cross_validate(SGDRegressionModel,X,y,cv=3,scoring=('r2','neg_mean_squared_error'))
# Showing Results
print('Train Score Value : ', CrossValidateValues1['train_score'])
print('Test Score Value : ', CrossValidateValues1['test_score'])
print('Fit Time : ', CrossValidateValues1['fit_time'])
print('Score Time : ', CrossValidateValues1['score_time'])
print('Train MSE Value : ', CrossValidateValues2['train_neg_mean_squared_error'])
print('Test MSE Value : ', CrossValidateValues2['test_neg_mean_squared_error'])
print('Train R2 Value : ', CrossValidateValues2['train_r2'])
print('Test R2 Value : ', CrossValidateValues2['test_r2'])
7
مثال
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_validate
diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
reg= linear_model.LinearRegression()
cv_results = cross_validate(reg, X, y, cv=3,return_train_score=False)
for key in cv_results.keys():
print('value of ' , key , ' is ' , cv_results[key])
scores = cross_validate(reg, X, y, cv=5,
scoring=('r2', 'neg_mean_squared_error'),
return_train_score=True)
print('details are : \n' , scores)
8
3.1.2) cross_val_predict
و هي تقوم بحساب نتيجة تطبيق الموديل علي البيانات ,اي قيمة y_pred
يتم استخدامها عبر الموديول model_selection.cross_val_predict
9
الصيغة العامة
#Import Libraries
from sklearn.model_selection import cross_val_predict
#----------------------------------------------------
#Applying Cross Validate Predict :
'''
model_selection.cross_val_predict(estimator, X, y=None, groups=None,cv=’warn’, n_jobs=None,verbose=0,
fit_params=None, pre_dispatch=‘2*n_jobs’,method=’predict’)
'''
# don't forget to define the model first !!!
CrossValidatePredictionTrain = cross_val_predict(SelectedModel, X_train, y_train, cv=3)
CrossValidatePredictionTest = cross_val_predict(SelectedModel, X_test, y_test, cv=3)
# Showing Results
print('Cross Validate Prediction for Training Set: \n', CrossValidatePredictionTrain[:10])
print('Cross Validate Prediction for Testing Set: \n', CrossValidatePredictionTest[:10])
10
مثال
#Import Libraries
from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import train_test_split
#----------------------------------------------------
#load boston data
BostonData = load_boston()
#X Data
X = BostonData.data
#y Data
y = BostonData.target
#----------------------------------------------------
#Splitting data
11
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)
#----------------------------------------------------
#Applying Ridge Regression Model
RidgeRegressionModel = Ridge(alpha=1.0,random_state=33)
#----------------------------------------------------
#Applying Cross Validate Predict :
'''
model_selection.cross_val_predict(estimator, X, y=None, groups=None,cv=’warn’, n_jobs=None,verbose=0,
fit_params=None, pre_dispatch=‘2*n_jobs’,method=’predict’)
'''
# don't forget to define the model first !!!
CrossValidatePredictionTrain = cross_val_predict(RidgeRegressionModel, X_train, y_train, cv=3)
CrossValidatePredictionTest = cross_val_predict(RidgeRegressionModel, X_test, y_test, cv=3)
# Showing Results
print('Cross Validate Prediction for Training Set: \n', CrossValidatePredictionTrain[:10])
print('Cross Validate Prediction for Testing Set: \n', CrossValidatePredictionTest[:10])
12
مثال
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_predict
diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
model1 = LinearRegression()
model2 = SVR(gamma = 'auto')
model3 = DecisionTreeRegressor()
model4 = RandomForestRegressor(n_estimators = 20)
13
models = [model1 , model2 , model3 , model4]
x=0
for m in models:
x+=1
for n in range(2,5):
print('result of model number : ' , x ,' for cv value ',n,' is \n' , cross_val_predict(m, X, y, cv=n))
print('-----------------------------------')
print('=====================================')
print('=====================================')
14
3.1.3) cross_val_score
و هي تقوم بحساب قيمة scoreلكل موديل لكل تطبيقة KFold
يتم استخدامها عبر الموديول model_selection. cross_val_score
15
الصيغة العامة
#Import Libraries
from sklearn.model_selection import cross_val_score
#----------------------------------------------------
#Applying Cross Validate Score :
'''
model_selection.cross_val_score(estimator,X,y=None,groups=None,scoring=None,cv=’warn’,n_jobs=None,verbose=0,
fit_params=None,pre_dispatch=‘2*n_jobs’,error_score=’raise-deprecating’)
'''
# don't forget to define the model first !!!
CrossValidateScoreTrain = cross_val_score(SelectedModel, X_train, y_train, cv=3)
CrossValidateScoreTest = cross_val_score(SelectedModel, X_test, y_test, cv=3)
# Showing Results
print('Cross Validate Score for Training Set: \n', CrossValidateScoreTrain)
print('Cross Validate Score for Testing Set: \n', CrossValidateScoreTest)
16
مثال
#Import Libraries
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
#----------------------------------------------------
#load boston data
BostonData = load_boston()
#X Data
X = BostonData.data
#y Data
y = BostonData.target
#----------------------------------------------------
#Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)
17
#----------------------------------------------------
#Applying DecisionTreeRegressor Model
DecisionTreeRegressorModel = DecisionTreeRegressor( max_depth=3,random_state=33)
#----------------------------------------------------
#Applying Cross Validate Score :
'''
model_selection.cross_val_score(estimator,X,y=None,groups=None,scoring=None,cv=’warn’,n_jobs=None,verbose=0,
fit_params=None,pre_dispatch=‘2*n_jobs’,error_score=’raise-deprecating’)
'''
# don't forget to define the model first !!!
CrossValidateScoreTrain = cross_val_score(DecisionTreeRegressorModel, X_train, y_train, cv=3)
CrossValidateScoreTest = cross_val_score(DecisionTreeRegressorModel, X_test, y_test, cv=3)
# Showing Results
print('Cross Validate Score for Training Set: \n', CrossValidateScoreTrain)
print('Cross Validate Score for Testing Set: \n', CrossValidateScoreTest)
18
مثال
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
model1 = LinearRegression()
model2 = SVR(gamma = 'auto')
model3 = DecisionTreeRegressor()
model4 = RandomForestRegressor(n_estimators = 100)
19
models = [model1 , model2 , model3 , model4]
x=0
for m in models:
x+=1
for n in range(2,11):
print('result of model number : ' , x ,' for cv value ',n,' is ' , cross_val_score(m, X, y, cv=n))
print('-----------------------------------')
print('=====================================')
print('=====================================')
20