-
-
Notifications
You must be signed in to change notification settings - Fork 26.6k
Closed
Description
Describe the bug
Between version 1.3.2 and 1.4.0, LogisticRegression became less accurate.
Steps/Code to Reproduce
import numpy as np
import pandas as pd
import sklearn.pipeline
import sklearn.preprocessing
import sklearn.linear_model
df = pd.DataFrame({
'age': [0, 1, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
'exiting': [False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]
})
data_timeline = np.arange(df['age'].max() + 1)
estimator = sklearn.pipeline.Pipeline(
[
(
"onehot",
sklearn.preprocessing.OneHotEncoder(
categories=[data_timeline], sparse_output=False
)
),
(
"logistic",
sklearn.linear_model.LogisticRegression(
fit_intercept=False, C=1e6, max_iter=1000, tol=1e-7, solver="newton-cg"
),
),
]
)
estimator.fit(df[['age']], df["exiting"])
def logistic_regression_gradient(X, y, y_hat, coef, C):
return coef + C * (y_hat - y) @ X
gradient = logistic_regression_gradient(
X=estimator.named_steps['onehot'].transform(df[['age']]),
y=df["exiting"].astype(float).to_numpy(),
y_hat=estimator.predict_proba(df[['age']])[:, 1],
coef=estimator.named_steps["logistic"].coef_,
C=estimator.named_steps['logistic'].C
)
print(sklearn.__version__)
print(gradient)Expected Results
1.3.2
[[ 6.38057971e-06 -1.47032373e-02 3.23092176e-06 -1.86882858e-02
-1.85588401e-02 -7.10504243e-05 8.47822321e-05 -1.58804369e-02
-1.58804369e-02 -1.58804369e-02 -1.58804369e-02 -1.18021221e-02
-2.76625890e-03 -2.76625890e-03 -2.76625890e-03 -2.76625890e-03
-2.76625890e-03 -2.76625890e-03 -2.76625890e-03 0.00000000e+00
2.22974349e-02 2.22974349e-02 2.22974349e-02 2.22974349e-02
2.22974349e-02 2.22974349e-02 2.22974349e-02 2.22974349e-02
2.22974349e-02 2.22974349e-02 2.22974349e-02 2.22974349e-02
2.22974349e-02]]
Actual Results
1.4.0
[[ 2.68892818e-03 3.64025037e+00 6.76589138e-04 3.77020823e+00
3.80372303e+00 -1.41805879e-04 1.51207953e-02 3.89408378e+00
3.89408378e+00 3.89408378e+00 3.89408378e+00 3.96014198e+00
4.05600402e+00 4.05600402e+00 4.05600402e+00 4.05600402e+00
4.05600402e+00 4.05600402e+00 4.05600402e+00 0.00000000e+00
4.22741992e+00 4.22741992e+00 4.22741992e+00 4.22741992e+00
4.22741992e+00 4.22741992e+00 4.22741992e+00 4.22741992e+00
4.22741992e+00 4.22741992e+00 4.22741992e+00 4.22741992e+00
4.22741992e+00]]
Versions
System:
python: 3.12.2 | packaged by conda-forge | (main, Feb 16 2024, 20:50:58) [GCC 12.3.0]
executable: ~/miniforge3/envs/sklearn-env/bin/python
machine: Linux-6.8.1-arch1-1-x86_64-with-glibc2.39
Python dependencies:
sklearn: 1.4.0
pip: 24.0
setuptools: 69.2.0
numpy: 1.26.4
scipy: 1.12.0
Cython: 3.0.9
pandas: 2.2.1
matplotlib: None
joblib: 1.3.2
threadpoolctl: 3.4.0
Built with OpenMP: True
threadpoolctl info:
user_api: blas
internal_api: openblas
num_threads: 8
prefix: libopenblas
filepath: ~/miniforge3/envs/sklearn-env/lib/libopenblasp-r0.3.26.so
version: 0.3.26
threading_layer: pthreads
architecture: Zen
user_api: openmp
internal_api: openmp
num_threads: 8
prefix: libgomp
filepath: ~/miniforge3/envs/sklearn-env/lib/libgomp.so.1.0.0
version: None