@@ -929,6 +929,13 @@ def fit(self, X, y, sample_weight=None, monitor=None):
929
929
computing held-out estimates, early stopping, model introspect, and
930
930
snapshoting.
931
931
932
+ presort : bool, optional (default=False)
933
+
934
+ Whether to presort the data to speed up the finding of best splits in
935
+ fitting. By default gradient boosting uses presorting, but this may
936
+ slow down the training process on large datasets, or with deep trees.
937
+ This option is not available for sparse data.
938
+
932
939
Returns
933
940
-------
934
941
self : object
@@ -1285,6 +1292,14 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
1285
1292
If None, the random number generator is the RandomState instance used
1286
1293
by `np.random`.
1287
1294
1295
+ presort : bool, optional (default=False)
1296
+
1297
+ Whether to presort the data to speed up the finding of best splits in
1298
+ fitting. By default this is turned on for gradient boosting. However,
1299
+ if the dataset is very large, or the trees being built are deep, this
1300
+ may have performance costs. This option is not available for sparse
1301
+ data.
1302
+
1288
1303
Attributes
1289
1304
----------
1290
1305
feature_importances_ : array, shape = [n_features]
@@ -1336,7 +1351,8 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
1336
1351
min_samples_leaf = 1 , min_weight_fraction_leaf = 0. ,
1337
1352
max_depth = 3 , init = None , random_state = None ,
1338
1353
max_features = None , verbose = 0 ,
1339
- max_leaf_nodes = None , warm_start = False ):
1354
+ max_leaf_nodes = None , warm_start = False ,
1355
+ presort = True ):
1340
1356
1341
1357
super (GradientBoostingClassifier , self ).__init__ (
1342
1358
loss = loss , learning_rate = learning_rate , n_estimators = n_estimators ,
@@ -1346,7 +1362,8 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
1346
1362
max_depth = max_depth , init = init , subsample = subsample ,
1347
1363
max_features = max_features ,
1348
1364
random_state = random_state , verbose = verbose ,
1349
- max_leaf_nodes = max_leaf_nodes , warm_start = warm_start )
1365
+ max_leaf_nodes = max_leaf_nodes , warm_start = warm_start ,
1366
+ presort = presort )
1350
1367
1351
1368
def _validate_y (self , y ):
1352
1369
self .classes_ , y = np .unique (y , return_inverse = True )
@@ -1611,6 +1628,13 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
1611
1628
If None, the random number generator is the RandomState instance used
1612
1629
by `np.random`.
1613
1630
1631
+ presort : bool, optional (default=False)
1632
+
1633
+ Whether to presort the data to speed up the finding of best splits in
1634
+ fitting. By default this is turned on for gradient boosting. However,
1635
+ if the dataset is very large, or the trees being built are deep, this
1636
+ may have performance costs. This option is not available for sparse
1637
+ data.
1614
1638
1615
1639
Attributes
1616
1640
----------
@@ -1660,7 +1684,7 @@ def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
1660
1684
min_samples_leaf = 1 , min_weight_fraction_leaf = 0. ,
1661
1685
max_depth = 3 , init = None , random_state = None ,
1662
1686
max_features = None , alpha = 0.9 , verbose = 0 , max_leaf_nodes = None ,
1663
- warm_start = False ):
1687
+ warm_start = False , presort = True ):
1664
1688
1665
1689
super (GradientBoostingRegressor , self ).__init__ (
1666
1690
loss = loss , learning_rate = learning_rate , n_estimators = n_estimators ,
@@ -1670,7 +1694,8 @@ def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
1670
1694
max_depth = max_depth , init = init , subsample = subsample ,
1671
1695
max_features = max_features ,
1672
1696
random_state = random_state , alpha = alpha , verbose = verbose ,
1673
- max_leaf_nodes = max_leaf_nodes , warm_start = warm_start )
1697
+ max_leaf_nodes = max_leaf_nodes , warm_start = warm_start ,
1698
+ presort = True )
1674
1699
1675
1700
def predict (self , X ):
1676
1701
"""Predict regression target for X.
0 commit comments