Description
Describe the bug
Hi,
I have been trying to build a ColumnTransformer with different values in the n_jobs' parameter, but when fitting and transforming throws the error ValueError: cannot set WRITEABLE flag to True of this array. I am fitting directly a Pandas DataFrame, so not sure if that would be the problem.
Thanks
Best
Steps/Code to Reproduce
from sklearn.preprocessing import (
PowerTransformer,
QuantileTransformer,
MinMaxScaler,
)
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
pow_scaler = PowerTransformer()
quant_scaler = QuantileTransformer(output_distribution="normal")
minmax_scaler = MinMaxScaler()
pip_pow_max = Pipeline(steps=[("pow", pow_scaler), ("max", minmax_scaler)])
pip_quant_max = Pipeline(steps=[("quant", quant_scaler), ("max", minmax_scaler)])
preprocessor = ColumnTransformer(
transformers=[
(
"pip_quant_max",
pip_quant_max,
[
"Length",
"Diameter",
"Whole weight",
"Whole weight.1",
"Whole weight.2",
"Shell weight",
],
),
("pip_power_max", pip_pow_max, ["Height"]),
],
remainder="passthrough",
verbose_feature_names_out=False,
n_jobs=-1
)
check = pd.DataFrame(
data=preprocessor.fit_transform(df_train),
columns=preprocessor.get_feature_names_out(),
)
Expected Results
No error thrown
Actual Results
{
"name": "ValueError",
"message": "cannot set WRITEABLE flag to True of this array",
"stack": "---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
\"\"\"
Traceback (most recent call last):
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/joblib/externals/loky/process_executor.py\", line 463, in _process_worker
r = call_item()
^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/joblib/externals/loky/process_executor.py\", line 291, in __call__
return self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/joblib/parallel.py\", line 589, in __call__
return [func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/joblib/parallel.py\", line 589, in <listcomp>
return [func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/utils/parallel.py\", line 129, in __call__
return self.function(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/pipeline.py\", line 1303, in _fit_transform_one
res = transformer.fit_transform(X, y, **params.get(\"fit_transform\", {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/base.py\", line 1474, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/pipeline.py\", line 535, in fit_transform
Xt = self._fit(X, y, routed_params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/pipeline.py\", line 408, in _fit
X, fitted_transformer = fit_transform_one_cached(
^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/joblib/memory.py\", line 353, in __call__
return self.func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/pipeline.py\", line 1303, in _fit_transform_one
res = transformer.fit_transform(X, y, **params.get(\"fit_transform\", {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/utils/_set_output.py\", line 295, in wrapped
data_to_wrap = f(self, X, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/base.py\", line 1098, in fit_transform
return self.fit(X, **fit_params).transform(X)
^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/base.py\", line 1474, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/preprocessing/_data.py\", line 2758, in fit
X = self._check_inputs(X, in_fit=True, copy=False)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/preprocessing/_data.py\", line 2847, in _check_inputs
X = self._validate_data(
^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/base.py\", line 633, in _validate_data
out = check_array(X, input_name=\"X\", **check_params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"/Users/xxxx/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/utils/validation.py\", line 1097, in check_array
array.flags.writeable = True
^^^^^^^^^^^^^^^^^^^^^
ValueError: cannot set WRITEABLE flag to True of this array
\"\"\"
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In[31], line 2
1 check = pd.DataFrame(
----> 2 data=preprocessor.fit_transform(df_train),
3 columns=preprocessor.get_feature_names_out(),
4 )
6 check
File ~/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/utils/_set_output.py:295, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
293 @wraps(f)
294 def wrapped(self, X, *args, **kwargs):
--> 295 data_to_wrap = f(self, X, *args, **kwargs)
296 if isinstance(data_to_wrap, tuple):
297 # only wrap the first output for cross decomposition
298 return_tuple = (
299 _wrap_data_with_container(method, data_to_wrap[0], X, self),
300 *data_to_wrap[1:],
301 )
File ~/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/base.py:1474, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
1467 estimator._validate_params()
1469 with config_context(
1470 skip_parameter_validation=(
1471 prefer_skip_nested_validation or global_skip_validation
1472 )
1473 ):
-> 1474 return fit_method(estimator, *args, **kwargs)
File ~/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/compose/_column_transformer.py:914, in ColumnTransformer.fit_transform(self, X, y, **params)
911 else:
912 routed_params = self._get_empty_routing()
--> 914 result = self._call_func_on_transformers(
915 X,
916 y,
917 _fit_transform_one,
918 column_as_labels=False,
919 routed_params=routed_params,
920 )
922 if not result:
923 self._update_fitted_transformers([])
File ~/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/compose/_column_transformer.py:823, in ColumnTransformer._call_func_on_transformers(self, X, y, func, column_as_labels, routed_params)
811 extra_args = {}
812 jobs.append(
813 delayed(func)(
814 transformer=clone(trans) if not fitted else trans,
(...)
820 )
821 )
--> 823 return Parallel(n_jobs=self.n_jobs)(jobs)
825 except ValueError as e:
826 if \"Expected 2D array, got 1D array instead\" in str(e):
File ~/kaggle_2/new_env/lib/python3.11/site-packages/sklearn/utils/parallel.py:67, in Parallel.__call__(self, iterable)
62 config = get_config()
63 iterable_with_config = (
64 (_with_config(delayed_func, config), args, kwargs)
65 for delayed_func, args, kwargs in iterable
66 )
---> 67 return super().__call__(iterable_with_config)
File ~/kaggle_2/new_env/lib/python3.11/site-packages/joblib/parallel.py:1952, in Parallel.__call__(self, iterable)
1946 # The first item from the output is blank, but it makes the interpreter
1947 # progress until it enters the Try/Except block of the generator and
1948 # reach the first `yield` statement. This starts the aynchronous
1949 # dispatch of the tasks to the workers.
1950 next(output)
-> 1952 return output if self.return_generator else list(output)
File ~/kaggle_2/new_env/lib/python3.11/site-packages/joblib/parallel.py:1595, in Parallel._get_outputs(self, iterator, pre_dispatch)
1592 yield
1594 with self._backend.retrieval_context():
-> 1595 yield from self._retrieve()
1597 except GeneratorExit:
1598 # The generator has been garbage collected before being fully
1599 # consumed. This aborts the remaining tasks if possible and warn
1600 # the user if necessary.
1601 self._exception = True
File ~/kaggle_2/new_env/lib/python3.11/site-packages/joblib/parallel.py:1699, in Parallel._retrieve(self)
1692 while self._wait_retrieval():
1693
1694 # If the callback thread of a worker has signaled that its task
1695 # triggered an exception, or if the retrieval loop has raised an
1696 # exception (e.g. `GeneratorExit`), exit the loop and surface the
1697 # worker traceback.
1698 if self._aborting:
-> 1699 self._raise_error_fast()
1700 break
1702 # If the next job is not ready for retrieval yet, we just wait for
1703 # async callbacks to progress.
File ~/kaggle_2/new_env/lib/python3.11/site-packages/joblib/parallel.py:1734, in Parallel._raise_error_fast(self)
1730 # If this error job exists, immediatly raise the error by
1731 # calling get_result. This job might not exists if abort has been
1732 # called directly or if the generator is gc'ed.
1733 if error_job is not None:
-> 1734 error_job.get_result(self.timeout)
File ~/kaggle_2/new_env/lib/python3.11/site-packages/joblib/parallel.py:736, in BatchCompletionCallBack.get_result(self, timeout)
730 backend = self.parallel._backend
732 if backend.supports_retrieve_callback:
733 # We assume that the result has already been retrieved by the
734 # callback thread, and is stored internally. It's just waiting to
735 # be returned.
--> 736 return self._return_or_raise()
738 # For other backends, the main thread needs to run the retrieval step.
739 try:
File ~/kaggle_2/new_env/lib/python3.11/site-packages/joblib/parallel.py:754, in BatchCompletionCallBack._return_or_raise(self)
752 try:
753 if self.status == TASK_ERROR:
--> 754 raise self._result
755 return self._result
756 finally:
ValueError: cannot set WRITEABLE flag to True of this array"
}
Versions
System:
python: 3.11.6 | packaged by conda-forge | (main, Oct 3 2023, 10:37:07) [Clang 15.0.7 ]
executable: /Users/xxxx/kaggle_2/new_env/bin/python
machine: macOS-14.4.1-arm64-arm-64bit
Python dependencies:
sklearn: 1.4.1.post1
pip: 24.0
setuptools: 69.2.0
numpy: 1.23.5
scipy: 1.12.0
Cython: 3.0.9
pandas: 2.1.4
matplotlib: 3.8.3
joblib: 1.3.2
threadpoolctl: 3.4.0
Built with OpenMP: True
threadpoolctl info:
user_api: blas
internal_api: openblas
num_threads: 10
prefix: libopenblas
filepath: /Users/xxxx/kaggle_2/new_env/lib/libopenblas.0.dylib
version: 0.3.26
threading_layer: openmp
architecture: VORTEX
user_api: openmp
internal_api: openmp
num_threads: 10
prefix: libomp
filepath: /Users/xxxx/kaggle_2/new_env/lib/libomp.dylib
version: None