45
45
46
46
from ..tree .tree import DecisionTreeRegressor
47
47
from ..tree ._tree import DTYPE , TREE_LEAF
48
- from ..tree ._tree import PresortBestSplitter
49
- from ..tree ._tree import FriedmanMSE
50
48
51
49
from ._gradient_boosting import predict_stages
52
50
from ._gradient_boosting import predict_stage
@@ -731,8 +729,8 @@ def __init__(self, loss, learning_rate, n_estimators, min_samples_split,
731
729
732
730
self .estimators_ = np .empty ((0 , 0 ), dtype = np .object )
733
731
734
- def _fit_stage (self , i , X , y , y_pred , sample_weight , sample_mask ,
735
- criterion , splitter , random_state ):
732
+ def _fit_stage (self , i , X , X_idx_sorted , y , y_pred , sample_weight , sample_mask ,
733
+ random_state ):
736
734
"""Fit another stage of ``n_classes_`` trees to the boosting model. """
737
735
738
736
assert sample_mask .dtype == np .bool
@@ -748,8 +746,8 @@ def _fit_stage(self, i, X, y, y_pred, sample_weight, sample_mask,
748
746
749
747
# induce regression tree on residuals
750
748
tree = DecisionTreeRegressor (
751
- criterion = criterion ,
752
- splitter = splitter ,
749
+ criterion = 'mse' ,
750
+ splitter = 'best' ,
753
751
max_depth = self .max_depth ,
754
752
min_samples_split = self .min_samples_split ,
755
753
min_samples_leaf = self .min_samples_leaf ,
@@ -763,7 +761,7 @@ def _fit_stage(self, i, X, y, y_pred, sample_weight, sample_mask,
763
761
sample_weight = sample_weight * sample_mask .astype (np .float64 )
764
762
765
763
tree .fit (X , residual , sample_weight = sample_weight ,
766
- check_input = False )
764
+ check_input = False , presort = True , X_idx_sorted = X_idx_sorted )
767
765
768
766
# update tree leaves
769
767
loss .update_terminal_regions (tree .tree_ , X , y , residual , y_pred ,
@@ -975,9 +973,12 @@ def fit(self, X, y, sample_weight=None, monitor=None):
975
973
y_pred = self ._decision_function (X )
976
974
self ._resize_state ()
977
975
976
+ X_idx_sorted = np .asfortranarray (np .argsort (X , axis = 0 ),
977
+ dtype = np .int32 )
978
+
978
979
# fit the boosting stages
979
- n_stages = self ._fit_stages (X , y , y_pred , sample_weight , random_state ,
980
- begin_at_stage , monitor )
980
+ n_stages = self ._fit_stages (X , X_idx_sorted , y , y_pred , sample_weight ,
981
+ random_state , begin_at_stage , monitor )
981
982
# change shape of arrays after fit (early-stopping or additional ests)
982
983
if n_stages != self .estimators_ .shape [0 ]:
983
984
self .estimators_ = self .estimators_ [:n_stages ]
@@ -987,7 +988,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
987
988
988
989
return self
989
990
990
- def _fit_stages (self , X , y , y_pred , sample_weight , random_state ,
991
+ def _fit_stages (self , X , X_idx_sorted , y , y_pred , sample_weight , random_state ,
991
992
begin_at_stage = 0 , monitor = None ):
992
993
"""Iteratively fits the stages.
993
994
@@ -1009,14 +1010,6 @@ def _fit_stages(self, X, y, y_pred, sample_weight, random_state,
1009
1010
else :
1010
1011
min_weight_leaf = 0.
1011
1012
1012
- # init criterion and splitter
1013
- criterion = FriedmanMSE (1 )
1014
- splitter = PresortBestSplitter (criterion ,
1015
- self .max_features_ ,
1016
- self .min_samples_leaf ,
1017
- min_weight_leaf ,
1018
- random_state )
1019
-
1020
1013
if self .verbose :
1021
1014
verbose_reporter = VerboseReporter (self .verbose )
1022
1015
verbose_reporter .init (self , begin_at_stage )
@@ -1035,9 +1028,8 @@ def _fit_stages(self, X, y, y_pred, sample_weight, random_state,
1035
1028
sample_weight [~ sample_mask ])
1036
1029
1037
1030
# fit next stage of trees
1038
- y_pred = self ._fit_stage (i , X , y , y_pred , sample_weight ,
1039
- sample_mask , criterion , splitter ,
1040
- random_state )
1031
+ y_pred = self ._fit_stage (i , X , X_idx_sorted , y , y_pred , sample_weight ,
1032
+ sample_mask , random_state )
1041
1033
1042
1034
# track deviance (= loss)
1043
1035
if do_oob :
0 commit comments