diff --git a/benchmarks/bench_hist_gradient_boosting.py b/benchmarks/bench_hist_gradient_boosting.py
index 7f7dec004b809..58477e8894fd1 100644
--- a/benchmarks/bench_hist_gradient_boosting.py
+++ b/benchmarks/bench_hist_gradient_boosting.py
@@ -140,7 +140,9 @@ def one_run(n_samples):
     lightgbm_score_duration = None
     if args.lightgbm:
         print("Fitting a LightGBM model...")
-        lightgbm_est = get_equivalent_estimator(est, lib="lightgbm")
+        lightgbm_est = get_equivalent_estimator(
+            est, lib="lightgbm", n_classes=args.n_classes
+        )
 
         tic = time()
         lightgbm_est.fit(X_train, y_train, sample_weight=sample_weight_train)
diff --git a/benchmarks/bench_hist_gradient_boosting_threading.py b/benchmarks/bench_hist_gradient_boosting_threading.py
index 264c9f0dbd704..a43debf043dee 100644
--- a/benchmarks/bench_hist_gradient_boosting_threading.py
+++ b/benchmarks/bench_hist_gradient_boosting_threading.py
@@ -135,7 +135,9 @@ def get_estimator_and_data():
     for libname in ["lightgbm", "xgboost", "catboost"]:
         if getattr(args, libname):
             print(libname)
-            est = get_equivalent_estimator(sklearn_est, lib=libname)
+            est = get_equivalent_estimator(
+                sklearn_est, lib=libname, n_classes=args.n_classes
+            )
             pprint(est.get_params())
 
 
@@ -169,7 +171,9 @@ def one_run(n_threads, n_samples):
     lightgbm_score_duration = None
     if args.lightgbm:
         print("Fitting a LightGBM model...")
-        lightgbm_est = get_equivalent_estimator(est, lib="lightgbm")
+        lightgbm_est = get_equivalent_estimator(
+            est, lib="lightgbm", n_classes=args.n_classes
+        )
         lightgbm_est.set_params(num_threads=n_threads)
 
         tic = time()
diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
index f67f72836e72d..a15ce3657f6f6 100755
--- a/build_tools/azure/install.sh
+++ b/build_tools/azure/install.sh
@@ -113,9 +113,8 @@ elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
     python -m pip install --only-binary :all: scikit-image || true
 
     python -m pip install pandas matplotlib pyamg
-    # do not install dependencies for lightgbm since it requires scikit-learn
-    # and install a version less than 3.0.0 until the issue #18316 is solved.
-    python -m pip install "lightgbm<3.0.0" --no-deps
+    # do not install dependencies for lightgbm since it requires scikit-learn.
+    python -m pip install "lightgbm>=3.0.0" --no-deps
 elif [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then
     # FIXME: temporary fix to link against system libraries on linux
     export LDFLAGS="$LDFLAGS -Wl,--sysroot=/"
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
index 7046f1a74fb5d..95c0d5f53d640 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
@@ -104,11 +104,12 @@ def test_same_predictions_classification(
 
     rng = np.random.RandomState(seed=seed)
     max_iter = 1
+    n_classes = 2
     max_bins = 255
 
     X, y = make_classification(
         n_samples=n_samples,
-        n_classes=2,
+        n_classes=n_classes,
         n_features=5,
         n_informative=5,
         n_redundant=0,
@@ -174,13 +175,14 @@ def test_same_predictions_multiclass_classification(
     pytest.importorskip("lightgbm")
 
     rng = np.random.RandomState(seed=seed)
+    n_classes = 3
     max_iter = 1
     max_bins = 255
     lr = 1
 
     X, y = make_classification(
         n_samples=n_samples,
-        n_classes=3,
+        n_classes=n_classes,
         n_features=5,
         n_informative=5,
         n_redundant=0,
@@ -204,7 +206,9 @@ def test_same_predictions_multiclass_classification(
         min_samples_leaf=min_samples_leaf,
         max_leaf_nodes=max_leaf_nodes,
     )
-    est_lightgbm = get_equivalent_estimator(est_sklearn, lib="lightgbm")
+    est_lightgbm = get_equivalent_estimator(
+        est_sklearn, lib="lightgbm", n_classes=n_classes
+    )
 
     est_lightgbm.fit(X_train, y_train)
     est_sklearn.fit(X_train, y_train)
@@ -224,7 +228,8 @@ def test_same_predictions_multiclass_classification(
 
     acc_lightgbm = accuracy_score(y_train, pred_lightgbm)
     acc_sklearn = accuracy_score(y_train, pred_sklearn)
-    np.testing.assert_almost_equal(acc_lightgbm, acc_sklearn, decimal=2)
+
+    np.testing.assert_allclose(acc_lightgbm, acc_sklearn, rtol=0, atol=5e-2)
 
     if max_leaf_nodes < 10 and n_samples >= 1000:
 
diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx
index 9f1c579658c3b..53aaa450c90ce 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx
@@ -13,7 +13,7 @@ from .common cimport G_H_DTYPE_C
 from .common cimport Y_DTYPE_C
 
 
-def get_equivalent_estimator(estimator, lib='lightgbm'):
+def get_equivalent_estimator(estimator, lib='lightgbm', n_classes=None):
     """Return an unfitted estimator from another lib with matching hyperparams.
 
     This utility function takes care of renaming the sklearn parameters into
@@ -70,7 +70,11 @@ def get_equivalent_estimator(estimator, lib='lightgbm'):
     if sklearn_params['loss'] == 'categorical_crossentropy':
         # LightGBM multiplies hessians by 2 in multiclass loss.
         lightgbm_params['min_sum_hessian_in_leaf'] *= 2
-        lightgbm_params['learning_rate'] *= 2
+        # LightGBM 3.0 introduced a different scaling of the hessian for the multiclass case.
+        # It is equivalent of scaling the learning rate.
+        # See https://github.com/microsoft/LightGBM/pull/3256.
+        if n_classes is not None:
+            lightgbm_params['learning_rate'] *= n_classes / (n_classes - 1)
 
     # XGB
     xgboost_loss_mapping = {