From 2fc789a9d11bdc17ebfe84db6b8dd4621ea2941e Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Wed, 18 Jan 2023 10:54:48 +0100 Subject: [PATCH 1/3] CI Adapt handling of discarded fused typed memoryview As of Cython>=3.0, memory views can't be None otherwise the runtime would not know which concrete implementation to dispatch the Python call to. This simply adapt a call to pass an empty numpy array to resolve the concrete implementation of Cython functions using fused-types. --- sklearn/datasets/_svmlight_format_fast.pyx | 5 +++-- sklearn/datasets/_svmlight_format_io.py | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sklearn/datasets/_svmlight_format_fast.pyx b/sklearn/datasets/_svmlight_format_fast.pyx index cb0a3885ed6cb..b578584e5ac47 100644 --- a/sklearn/datasets/_svmlight_format_fast.pyx +++ b/sklearn/datasets/_svmlight_format_fast.pyx @@ -187,6 +187,7 @@ def _dump_svmlight_file( bint y_is_sp, ): cdef bint X_is_integral + cdef bint query_id_is_not_empty = query_id.size > 0 X_is_integral = X.dtype.kind == "i" if X_is_integral: value_pattern = "%d:%d" @@ -198,7 +199,7 @@ def _dump_svmlight_file( label_pattern = "%.16g" line_pattern = "%s" - if query_id is not None: + if query_id_is_not_empty: line_pattern += " qid:%d" line_pattern += " %s\n" @@ -246,7 +247,7 @@ def _dump_svmlight_file( else: labels_str = label_pattern % y[i,0] - if query_id is not None: + if query_id_is_not_empty: feat = (labels_str, query_id[i], s) else: feat = (labels_str, s) diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py index 16aae0de4f2b0..8c3bcdf53d152 100644 --- a/sklearn/datasets/_svmlight_format_io.py +++ b/sklearn/datasets/_svmlight_format_io.py @@ -512,6 +512,12 @@ def dump_svmlight_file( raise ValueError( "expected query_id of shape (n_samples,), got %r" % (query_id.shape,) ) + else: + # NOTE: query_id is passed to Cython functions using fuse type. + # Yet as of Cython>=3.0, memory views can't be None otherwise the runtime + # would not known which concrete implementation to dispatch the Python call to. + # TODO: simplify interfaces and implementations in _svmlight_format_fast.pyx. + query_id = np.array([], dtype=np.int32) one_based = not zero_based From 6921da804316709eab460389c19dcfd73b7bf395 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Wed, 18 Jan 2023 11:02:13 +0100 Subject: [PATCH 2/3] [scipy-dev] Trigger CI From 55517d7255cee66aa4b2c073b8b177261f9db98f Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Wed, 18 Jan 2023 11:31:12 +0100 Subject: [PATCH 3/3] [scipy-dev] Make the handling more readable Co-authored-by: Olivier Grisel --- sklearn/datasets/_svmlight_format_io.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py index 8c3bcdf53d152..2a141e1732ff7 100644 --- a/sklearn/datasets/_svmlight_format_io.py +++ b/sklearn/datasets/_svmlight_format_io.py @@ -506,18 +506,18 @@ def dump_svmlight_file( if hasattr(X, "sort_indices"): X.sort_indices() - if query_id is not None: + if query_id is None: + # NOTE: query_id is passed to Cython functions using a fused type on query_id. + # Yet as of Cython>=3.0, memory views can't be None otherwise the runtime + # would not known which concrete implementation to dispatch the Python call to. + # TODO: simplify interfaces and implementations in _svmlight_format_fast.pyx. + query_id = np.array([], dtype=np.int32) + else: query_id = np.asarray(query_id) if query_id.shape[0] != y.shape[0]: raise ValueError( "expected query_id of shape (n_samples,), got %r" % (query_id.shape,) ) - else: - # NOTE: query_id is passed to Cython functions using fuse type. - # Yet as of Cython>=3.0, memory views can't be None otherwise the runtime - # would not known which concrete implementation to dispatch the Python call to. - # TODO: simplify interfaces and implementations in _svmlight_format_fast.pyx. - query_id = np.array([], dtype=np.int32) one_based = not zero_based