Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion benchmarks/bench_hist_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ def one_run(n_samples):
lightgbm_score_duration = None
if args.lightgbm:
print("Fitting a LightGBM model...")
lightgbm_est = get_equivalent_estimator(est, lib="lightgbm")
lightgbm_est = get_equivalent_estimator(
est, lib="lightgbm", n_classes=args.n_classes
)

tic = time()
lightgbm_est.fit(X_train, y_train, sample_weight=sample_weight_train)
Expand Down
8 changes: 6 additions & 2 deletions benchmarks/bench_hist_gradient_boosting_threading.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ def get_estimator_and_data():
for libname in ["lightgbm", "xgboost", "catboost"]:
if getattr(args, libname):
print(libname)
est = get_equivalent_estimator(sklearn_est, lib=libname)
est = get_equivalent_estimator(
sklearn_est, lib=libname, n_classes=args.n_classes
)
pprint(est.get_params())


Expand Down Expand Up @@ -169,7 +171,9 @@ def one_run(n_threads, n_samples):
lightgbm_score_duration = None
if args.lightgbm:
print("Fitting a LightGBM model...")
lightgbm_est = get_equivalent_estimator(est, lib="lightgbm")
lightgbm_est = get_equivalent_estimator(
est, lib="lightgbm", n_classes=args.n_classes
)
lightgbm_est.set_params(num_threads=n_threads)

tic = time()
Expand Down
5 changes: 2 additions & 3 deletions build_tools/azure/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,8 @@ elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
python -m pip install --only-binary :all: scikit-image || true

python -m pip install pandas matplotlib pyamg
# do not install dependencies for lightgbm since it requires scikit-learn
# and install a version less than 3.0.0 until the issue #18316 is solved.
python -m pip install "lightgbm<3.0.0" --no-deps
# do not install dependencies for lightgbm since it requires scikit-learn.
python -m pip install "lightgbm>=3.0.0" --no-deps
elif [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then
# FIXME: temporary fix to link against system libraries on linux
export LDFLAGS="$LDFLAGS -Wl,--sysroot=/"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,12 @@ def test_same_predictions_classification(

rng = np.random.RandomState(seed=seed)
max_iter = 1
n_classes = 2
max_bins = 255

X, y = make_classification(
n_samples=n_samples,
n_classes=2,
n_classes=n_classes,
n_features=5,
n_informative=5,
n_redundant=0,
Expand Down Expand Up @@ -174,13 +175,14 @@ def test_same_predictions_multiclass_classification(
pytest.importorskip("lightgbm")

rng = np.random.RandomState(seed=seed)
n_classes = 3
max_iter = 1
max_bins = 255
lr = 1

X, y = make_classification(
n_samples=n_samples,
n_classes=3,
n_classes=n_classes,
n_features=5,
n_informative=5,
n_redundant=0,
Expand All @@ -204,7 +206,9 @@ def test_same_predictions_multiclass_classification(
min_samples_leaf=min_samples_leaf,
max_leaf_nodes=max_leaf_nodes,
)
est_lightgbm = get_equivalent_estimator(est_sklearn, lib="lightgbm")
est_lightgbm = get_equivalent_estimator(
est_sklearn, lib="lightgbm", n_classes=n_classes
)

est_lightgbm.fit(X_train, y_train)
est_sklearn.fit(X_train, y_train)
Expand All @@ -224,7 +228,8 @@ def test_same_predictions_multiclass_classification(

acc_lightgbm = accuracy_score(y_train, pred_lightgbm)
acc_sklearn = accuracy_score(y_train, pred_sklearn)
np.testing.assert_almost_equal(acc_lightgbm, acc_sklearn, decimal=2)

np.testing.assert_allclose(acc_lightgbm, acc_sklearn, rtol=0, atol=5e-2)

if max_leaf_nodes < 10 and n_samples >= 1000:

Expand Down
8 changes: 6 additions & 2 deletions sklearn/ensemble/_hist_gradient_boosting/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ from .common cimport G_H_DTYPE_C
from .common cimport Y_DTYPE_C


def get_equivalent_estimator(estimator, lib='lightgbm'):
def get_equivalent_estimator(estimator, lib='lightgbm', n_classes=None):
"""Return an unfitted estimator from another lib with matching hyperparams.

This utility function takes care of renaming the sklearn parameters into
Expand Down Expand Up @@ -70,7 +70,11 @@ def get_equivalent_estimator(estimator, lib='lightgbm'):
if sklearn_params['loss'] == 'categorical_crossentropy':
# LightGBM multiplies hessians by 2 in multiclass loss.
lightgbm_params['min_sum_hessian_in_leaf'] *= 2
lightgbm_params['learning_rate'] *= 2
# LightGBM 3.0 introduced a different scaling of the hessian for the multiclass case.
# It is equivalent of scaling the learning rate.
# See https://github.com/microsoft/LightGBM/pull/3256.
if n_classes is not None:
lightgbm_params['learning_rate'] *= n_classes / (n_classes - 1)

# XGB
xgboost_loss_mapping = {
Expand Down