From 73a25b8f7e26a5c0a32abdc6e6043d66b19cb55c Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Tue, 29 Nov 2022 14:02:14 +0500 Subject: [PATCH 01/20] MAINT Remove -Wcpp warnings when compiling sklearn.svm._libsvm --- sklearn/svm/_libsvm.pyx | 123 ++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 43 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 6c9fce2e785a7..9b458ea4af152 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -50,27 +50,33 @@ LIBSVM_KERNEL_TYPES = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'] # Wrapper functions def fit( - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] X, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] Y, - int svm_type=0, kernel='rbf', int degree=3, - double gamma=0.1, double coef0=0., double tol=1e-3, - double C=1., double nu=0.5, double epsilon=0.1, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - class_weight=np.empty(0), - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - sample_weight=np.empty(0), - int shrinking=1, int probability=0, + const cnp.float64_t[:, ::1] X, + const cnp.float64_t[::1] Y, + int svm_type=0, + kernel='rbf', + int degree=3, + double gamma=0.1, + double coef0=0., + double tol=1e-3, + double C=1., + double nu=0.5, + double epsilon=0.1, + const cnp.float64_t[::1] class_weight=np.empty(0), + const cnp.float64_t[::1] sample_weight=np.empty(0), + int shrinking=1, + int probability=0, double cache_size=100., int max_iter=-1, - int random_seed=0): + int random_seed=0 +): """ Train the model using libsvm (low-level method) Parameters ---------- - X : array-like, dtype=float64 of shape (n_samples, n_features) + X : 2d memory view on array, dtype=float64 of shape (n_samples, n_features) - Y : array, dtype=float64 of shape (n_samples,) + Y : memory view on array, dtype=float64 of shape (n_samples,) target vector svm_type : {0, 1, 2, 3, 4}, default=0 @@ -105,13 +111,13 @@ def fit( epsilon : double, default=0.1 Epsilon parameter in the epsilon-insensitive loss function. - class_weight : array, dtype=float64, shape (n_classes,), \ + class_weight : memory view on array, dtype=float64, shape (n_classes,), \ default=np.empty(0) Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. - sample_weight : array, dtype=float64, shape (n_samples,), \ + sample_weight : memory view on array, dtype=float64, shape (n_samples,), \ default=np.empty(0) Weights assigned to each sample. @@ -167,22 +173,44 @@ def fit( if len(sample_weight) == 0: sample_weight = np.ones(X.shape[0], dtype=np.float64) else: - assert sample_weight.shape[0] == X.shape[0], \ - "sample_weight and X have incompatible shapes: " + \ - "sample_weight has %s samples while X has %s" % \ - (sample_weight.shape[0], X.shape[0]) + assert ( + sample_weight.shape[0] == X.shape[0], + "sample_weight and X have incompatible shapes: " + + "sample_weight has %s samples while X has %s" % (sample_weight.shape[0], X.shape[0]) + ) kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) set_problem( - &problem, X.data, Y.data, sample_weight.data, X.shape, kernel_index) + &problem, + &X[0, 0], + &Y[0], + &sample_weight[0], + X.shape, + kernel_index, + ) if problem.x == NULL: raise MemoryError("Seems we've run out of memory") - cdef cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] \ - class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) set_parameter( - ¶m, svm_type, kernel_index, degree, gamma, coef0, nu, cache_size, - C, tol, epsilon, shrinking, probability, class_weight.shape[0], - class_weight_label.data, class_weight.data, max_iter, random_seed) + ¶m, + svm_type, + kernel_index, + degree, + gamma, + coef0, + nu, + cache_size, + C, + tol, + epsilon, + shrinking, + probability, + class_weight.shape[0], + &class_weight_label[0], + &class_weight[0], + max_iter, + random_seed + ) error_msg = svm_check_parameter(&problem, ¶m) if error_msg: @@ -201,51 +229,51 @@ def fit( SV_len = get_l(model) n_class = get_nr(model) - cdef cnp.ndarray[int, ndim=1, mode='c'] n_iter + cdef int[::1] n_iter n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc) - copy_n_iter(n_iter.data, model) + copy_n_iter( &n_iter[0], model) - cdef cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] sv_coef + cdef cnp.float64_t[:, ::1] sv_coef sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64) - copy_sv_coef (sv_coef.data, model) + copy_sv_coef ( &sv_coef[0, 0], model) # the intercept is just model.rho but with sign changed - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] intercept + cdef cnp.float64_t[::1] intercept intercept = np.empty(int((n_class*(n_class-1))/2), dtype=np.float64) - copy_intercept (intercept.data, model, intercept.shape) + copy_intercept ( &intercept[0], model, intercept.shape) - cdef cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] support + cdef cnp.int32_t[::1] support support = np.empty (SV_len, dtype=np.int32) - copy_support (support.data, model) + copy_support ( &support[0], model) # copy model.SV - cdef cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] support_vectors + cdef cnp.float64_t[:, ::1] support_vectors if kernel_index == 4: # precomputed kernel support_vectors = np.empty((0, 0), dtype=np.float64) else: support_vectors = np.empty((SV_len, X.shape[1]), dtype=np.float64) - copy_SV(support_vectors.data, model, support_vectors.shape) + copy_SV( &support_vectors[0, 0], model, support_vectors.shape) - cdef cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] n_class_SV + cdef cnp.int32_t[::1] n_class_SV if svm_type == 0 or svm_type == 1: n_class_SV = np.empty(n_class, dtype=np.int32) - copy_nSV(n_class_SV.data, model) + copy_nSV( &n_class_SV[0], model) else: # OneClass and SVR are considered to have 2 classes n_class_SV = np.array([SV_len, SV_len], dtype=np.int32) - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] probA - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] probB + cdef cnp.float64_t[::1] probA + cdef cnp.float64_t[::1] probB if probability != 0: if svm_type < 2: # SVC and NuSVC probA = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64) probB = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64) - copy_probB(probB.data, model, probB.shape) + copy_probB( &probB[0], model, probB.shape) else: probA = np.empty(1, dtype=np.float64) probB = np.empty(0, dtype=np.float64) - copy_probA(probA.data, model, probA.shape) + copy_probA( &probA[0], model, probA.shape) else: probA = np.empty(0, dtype=np.float64) probB = np.empty(0, dtype=np.float64) @@ -253,8 +281,17 @@ def fit( svm_free_and_destroy_model(&model) free(problem.x) - return (support, support_vectors, n_class_SV, sv_coef, intercept, - probA, probB, fit_status, n_iter) + return ( + support.base, + support_vectors.base, + n_class_SV.base, + sv_coef.base, + intercept.base, + probA.base, + probB.base, + fit_status, + n_iter.base, + ) cdef void set_predict_params( From 061fe55e6a03da92ee0497b4db001ef8445c4a82 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Tue, 29 Nov 2022 23:24:26 +0500 Subject: [PATCH 02/20] Use NULL when class_weight_label is an empty array --- sklearn/svm/_libsvm.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 9b458ea4af152..d660a6677b240 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -206,7 +206,7 @@ def fit( shrinking, probability, class_weight.shape[0], - &class_weight_label[0], + &class_weight_label[0] if class_weight_label.size > 0 else NULL, &class_weight[0], max_iter, random_seed From 275e7b8444ac3b641fed887c3806d39f1e112bcc Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Wed, 30 Nov 2022 11:33:38 +0500 Subject: [PATCH 03/20] Use NULL for class_weight pointer if it is empty --- sklearn/svm/_libsvm.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index d660a6677b240..cd463b7416d10 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -207,7 +207,7 @@ def fit( probability, class_weight.shape[0], &class_weight_label[0] if class_weight_label.size > 0 else NULL, - &class_weight[0], + &class_weight[0] if class_weight.size > 0 else NULL, max_iter, random_seed ) From ff4f524c20264bdff7b26401e88ab88b9e9df414 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Wed, 30 Nov 2022 17:04:46 +0500 Subject: [PATCH 04/20] Use the NULL pointer for sv_coef it is empty --- sklearn/svm/_libsvm.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index cd463b7416d10..cdfd114920ff3 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -235,7 +235,10 @@ def fit( cdef cnp.float64_t[:, ::1] sv_coef sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64) - copy_sv_coef ( &sv_coef[0, 0], model) + copy_sv_coef ( + &sv_coef[0, 0] if sv_coef.size > 0 else NULL, + model + ) # the intercept is just model.rho but with sign changed cdef cnp.float64_t[::1] intercept From afe1a9349698a63596676a3ec7ece6c8d3aac058 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Wed, 30 Nov 2022 18:04:05 +0500 Subject: [PATCH 05/20] Use the NULL pointer for support if it is empty --- sklearn/svm/_libsvm.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index cdfd114920ff3..b55f9212d0c93 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -247,7 +247,10 @@ def fit( cdef cnp.int32_t[::1] support support = np.empty (SV_len, dtype=np.int32) - copy_support ( &support[0], model) + copy_support ( + &support[0] if support.size > 0 else NULL, + model + ) # copy model.SV cdef cnp.float64_t[:, ::1] support_vectors From f9f965b2d7ba93f3ae201ceb4c1acf1feabfd2d4 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Wed, 30 Nov 2022 18:59:58 +0500 Subject: [PATCH 06/20] Use the NULL pointer for support vectors and n class SV if they are empty --- sklearn/svm/_libsvm.pyx | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index b55f9212d0c93..b63aada7123ff 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -259,12 +259,19 @@ def fit( support_vectors = np.empty((0, 0), dtype=np.float64) else: support_vectors = np.empty((SV_len, X.shape[1]), dtype=np.float64) - copy_SV( &support_vectors[0, 0], model, support_vectors.shape) + copy_SV( + &support_vectors[0, 0] if support_vectors.size > 0 else NULL, + model, + support_vectors.shape + ) cdef cnp.int32_t[::1] n_class_SV if svm_type == 0 or svm_type == 1: n_class_SV = np.empty(n_class, dtype=np.int32) - copy_nSV( &n_class_SV[0], model) + copy_nSV( + &n_class_SV[0] if n_class_SV.size > 0 else NULL, + model + ) else: # OneClass and SVR are considered to have 2 classes n_class_SV = np.array([SV_len, SV_len], dtype=np.int32) From d4c3ab96dee624b8a703c63908f93955fee41570 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Thu, 1 Dec 2022 14:35:26 +0500 Subject: [PATCH 07/20] Applied some formatting according to PR comment --- sklearn/svm/_libsvm.pyx | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index b63aada7123ff..682eb7f349367 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -74,7 +74,7 @@ def fit( Parameters ---------- - X : 2d memory view on array, dtype=float64 of shape (n_samples, n_features) + X : memory view on array, dtype=float64 of shape (n_samples, n_features) Y : memory view on array, dtype=float64 of shape (n_samples,) target vector @@ -229,24 +229,27 @@ def fit( SV_len = get_l(model) n_class = get_nr(model) - cdef int[::1] n_iter - n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc) - copy_n_iter( &n_iter[0], model) + cdef int[::1] n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc) + copy_n_iter( + &n_iter[0], + model + ) - cdef cnp.float64_t[:, ::1] sv_coef - sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64) + cdef cnp.float64_t[:, ::1] sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64) copy_sv_coef ( &sv_coef[0, 0] if sv_coef.size > 0 else NULL, model ) # the intercept is just model.rho but with sign changed - cdef cnp.float64_t[::1] intercept - intercept = np.empty(int((n_class*(n_class-1))/2), dtype=np.float64) - copy_intercept ( &intercept[0], model, intercept.shape) + cdef cnp.float64_t[::1] intercept = np.empty(int((n_class*(n_class-1))/2), dtype=np.float64) + copy_intercept ( + &intercept[0], + model, + intercept.shape + ) - cdef cnp.int32_t[::1] support - support = np.empty (SV_len, dtype=np.int32) + cdef cnp.int32_t[::1] support = np.empty (SV_len, dtype=np.int32) copy_support ( &support[0] if support.size > 0 else NULL, model From c90fe0f7ddc17155886917f0a36efd378a3288ef Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Thu, 1 Dec 2022 15:38:45 +0500 Subject: [PATCH 08/20] Apply black formatting to set_predict_params --- sklearn/svm/_libsvm.pyx | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 682eb7f349367..704a6245a6fab 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -311,9 +311,18 @@ def fit( cdef void set_predict_params( - svm_parameter *param, int svm_type, kernel, int degree, double gamma, - double coef0, double cache_size, int probability, int nr_weight, - char *weight_label, char *weight) except *: + svm_parameter *param, + int svm_type, + kernel, + int degree, + double gamma, + double coef0, + double cache_size, + int probability, + int nr_weight, + char *weight_label, + char *weight +) except *: """Fill param with prediction time-only parameters.""" # training-time only parameters @@ -327,9 +336,26 @@ cdef void set_predict_params( kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) - set_parameter(param, svm_type, kernel_index, degree, gamma, coef0, nu, - cache_size, C, tol, epsilon, shrinking, probability, - nr_weight, weight_label, weight, max_iter, random_seed) + set_parameter( + param, + svm_type, + kernel_index, + degree, + gamma, + coef0, + nu, + cache_size, + C, + tol, + epsilon, + shrinking, + probability, + nr_weight, + weight_label, + weight, + max_iter, + random_seed + ) def predict(cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] X, From 20e3d4b069edcb909248cc0036d6e39a30f1ac5d Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Thu, 1 Dec 2022 16:50:31 +0500 Subject: [PATCH 09/20] Replace cnp.ndarray with memory views in the predict function --- sklearn/svm/_libsvm.pyx | 95 +++++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 704a6245a6fab..39782e5d52e09 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -358,44 +358,47 @@ cdef void set_predict_params( ) -def predict(cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] X, - cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] support, - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] SV, - cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] nSV, - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] sv_coef, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] intercept, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] probA=np.empty(0), - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] probB=np.empty(0), - int svm_type=0, kernel='rbf', int degree=3, - double gamma=0.1, double coef0=0., - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - class_weight=np.empty(0), - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - sample_weight=np.empty(0), - double cache_size=100.): +def predict( + const cnp.float64_t[:, ::1] X, + const cnp.int32_t[::1] support, + const cnp.float64_t[:, ::1] SV, + const cnp.int32_t[::1] nSV, + const cnp.float64_t[:, ::1] sv_coef, + const cnp.float64_t[::1] intercept, + const cnp.float64_t[::1] probA=np.empty(0), + const cnp.float64_t[::1] probB=np.empty(0), + int svm_type=0, + kernel='rbf', + int degree=3, + double gamma=0.1, + double coef0=0., + const cnp.float64_t[::1] class_weight=np.empty(0), + const cnp.float64_t[::1] sample_weight=np.empty(0), + double cache_size=100. +): """ Predict target values of X given a model (low-level method) Parameters ---------- - X : array-like, dtype=float of shape (n_samples, n_features) + X : memory view on array, dtype=float of shape (n_samples, n_features) - support : array of shape (n_support,) + support : memory view on array of shape (n_support,) Index of support vectors in training set. - SV : array of shape (n_support, n_features) + SV : memory view on array of shape (n_support, n_features) Support vectors. - nSV : array of shape (n_class,) + nSV : memory view on array of shape (n_class,) Number of support vectors in each class. - sv_coef : array of shape (n_class-1, n_support) + sv_coef : memory view on array of shape (n_class-1, n_support) Coefficients of support vectors in decision function. - intercept : array of shape (n_class*(n_class-1)/2) + intercept : memory view on array of shape (n_class*(n_class-1)/2) Intercept in decision function. - probA, probB : array of shape (n_class*(n_class-1)/2,) + probA, probB : memory view on array of shape (n_class*(n_class-1)/2,) Probability estimates. svm_type : {0, 1, 2, 3, 4}, default=0 @@ -422,33 +425,59 @@ def predict(cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] X, dec_values : array Predicted values. """ - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] dec_values + cdef cnp.float64_t[::1] dec_values cdef svm_parameter param cdef svm_model *model cdef int rv - cdef cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] \ - class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) - set_predict_params(¶m, svm_type, kernel, degree, gamma, coef0, - cache_size, 0, class_weight.shape[0], - class_weight_label.data, class_weight.data) - model = set_model(¶m, nSV.shape[0], SV.data, SV.shape, - support.data, support.shape, sv_coef.strides, - sv_coef.data, intercept.data, nSV.data, probA.data, probB.data) + set_predict_params( + ¶m, + svm_type, + kernel, + degree, + gamma, + coef0, + cache_size, + 0, + class_weight.shape[0], + &class_weight_label[0], + &class_weight[0], + ) + model = set_model( + ¶m, + nSV.shape[0], + &SV[0, 0], + SV.shape, + &support[0], + support.shape, + sv_coef.strides, + &sv_coef[0, 0], + &intercept[0], + &nSV[0], + &probA[0], + &probB[0], + ) cdef BlasFunctions blas_functions blas_functions.dot = _dot[double] #TODO: use check_model try: dec_values = np.empty(X.shape[0]) with nogil: - rv = copy_predict(X.data, model, X.shape, dec_values.data, &blas_functions) + rv = copy_predict( + &X[0, 0], + model, + X.shape, + &dec_values[0], + &blas_functions, + ) if rv < 0: raise MemoryError("We've run out of memory") finally: free_model(model) - return dec_values + return dec_values.base def predict_proba( From 125a378fd5d7d6fc348acd2317ca6030ae751788 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Thu, 1 Dec 2022 17:48:08 +0500 Subject: [PATCH 10/20] Handle empty memory views in predict function --- sklearn/svm/_libsvm.pyx | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 39782e5d52e09..793dd45bde152 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -285,11 +285,19 @@ def fit( if svm_type < 2: # SVC and NuSVC probA = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64) probB = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64) - copy_probB( &probB[0], model, probB.shape) + copy_probB( + &probB[0], + model, + probB.shape + ) else: probA = np.empty(1, dtype=np.float64) probB = np.empty(0, dtype=np.float64) - copy_probA( &probA[0], model, probA.shape) + copy_probA( + &probA[0], + model, + probA.shape + ) else: probA = np.empty(0, dtype=np.float64) probB = np.empty(0, dtype=np.float64) @@ -442,18 +450,18 @@ def predict( cache_size, 0, class_weight.shape[0], - &class_weight_label[0], - &class_weight[0], + &class_weight_label[0] if class_weight_label.size > 0 else NULL, + &class_weight[0] if class_weight.size > 0 else NULL, ) model = set_model( ¶m, nSV.shape[0], - &SV[0, 0], + &SV[0, 0] if SV.size > 0 else NULL, SV.shape, - &support[0], + &support[0] if support.size > 0 else NULL, support.shape, sv_coef.strides, - &sv_coef[0, 0], + &sv_coef[0, 0] if sv_coef.size > 0 else NULL, &intercept[0], &nSV[0], &probA[0], From f258c57beffd4b2003b885be863c61805f5e6d46 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Thu, 1 Dec 2022 18:28:16 +0500 Subject: [PATCH 11/20] Add NULL handling for probA and probB --- sklearn/svm/_libsvm.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 793dd45bde152..d7e3293012f23 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -464,8 +464,8 @@ def predict( &sv_coef[0, 0] if sv_coef.size > 0 else NULL, &intercept[0], &nSV[0], - &probA[0], - &probB[0], + &probA[0] if probA.size > 0 else NULL, + &probB[0] if probB.size > 0 else NULL, ) cdef BlasFunctions blas_functions blas_functions.dot = _dot[double] From e8c9815158c452c518716467c222bb2591e214d9 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 2 Dec 2022 14:10:57 +0500 Subject: [PATCH 12/20] Replace cnp.ndarray with memory views in the predict_proba function --- sklearn/svm/_libsvm.pyx | 81 +++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index d7e3293012f23..5a6570a38ae49 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -489,21 +489,23 @@ def predict( def predict_proba( - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] X, - cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] support, - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] SV, - cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] nSV, - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] sv_coef, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] intercept, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] probA=np.empty(0), - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] probB=np.empty(0), - int svm_type=0, kernel='rbf', int degree=3, - double gamma=0.1, double coef0=0., - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - class_weight=np.empty(0), - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - sample_weight=np.empty(0), - double cache_size=100.): + const cnp.float64_t[:, ::1] X, + const cnp.int32_t[::1] support, + const cnp.float64_t[:, ::1] SV, + const cnp.int32_t[::1] nSV, + cnp.float64_t[:, ::1] sv_coef, + cnp.float64_t[::1] intercept, + cnp.float64_t[::1] probA=np.empty(0), + cnp.float64_t[::1] probB=np.empty(0), + int svm_type=0, + kernel='rbf', + int degree=3, + double gamma=0.1, + double coef0=0., + cnp.float64_t[::1] class_weight=np.empty(0), + cnp.float64_t[::1] sample_weight=np.empty(0), + double cache_size=100. +): """ Predict probabilities @@ -563,20 +565,39 @@ def predict_proba( dec_values : array Predicted values. """ - cdef cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] dec_values + cdef cnp.float64_t[:, ::1] dec_values cdef svm_parameter param cdef svm_model *model - cdef cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] \ - class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) cdef int rv - set_predict_params(¶m, svm_type, kernel, degree, gamma, coef0, - cache_size, 1, class_weight.shape[0], - class_weight_label.data, class_weight.data) - model = set_model(¶m, nSV.shape[0], SV.data, SV.shape, - support.data, support.shape, sv_coef.strides, - sv_coef.data, intercept.data, nSV.data, - probA.data, probB.data) + set_predict_params( + ¶m, + svm_type, + kernel, + degree, + gamma, + coef0, + cache_size, + 1, + class_weight.shape[0], + &class_weight_label[0] if class_weight_label.size > 0 else NULL, + &class_weight[0] if class_weight.size > 0 else NULL, + ) + model = set_model( + ¶m, + nSV.shape[0], + &SV[0, 0], + SV.shape, + &support[0], + support.shape, + sv_coef.strides, + &sv_coef[0, 0], + &intercept[0], + &nSV[0], + &probA[0] if probA.size > 0 else NULL, + &probB[0] if probB.size > 0 else NULL, + ) cdef cnp.npy_intp n_class = get_nr(model) cdef BlasFunctions blas_functions @@ -584,13 +605,19 @@ def predict_proba( try: dec_values = np.empty((X.shape[0], n_class), dtype=np.float64) with nogil: - rv = copy_predict_proba(X.data, model, X.shape, dec_values.data, &blas_functions) + rv = copy_predict_proba( + &X[0, 0], + model, + X.shape, + &dec_values[0, 0], + &blas_functions + ) if rv < 0: raise MemoryError("We've run out of memory") finally: free_model(model) - return dec_values + return dec_values.base def decision_function( From df123fbbf0c8ef9179ac21b239f2fe4bee02aa26 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 2 Dec 2022 14:52:59 +0500 Subject: [PATCH 13/20] Add empty check for SV in predict_proba --- sklearn/svm/_libsvm.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 5a6570a38ae49..76b28c2a6d5cf 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -587,7 +587,7 @@ def predict_proba( model = set_model( ¶m, nSV.shape[0], - &SV[0, 0], + &SV[0, 0] if SV.size > 0 else NULL, SV.shape, &support[0], support.shape, From 32e21607772dd9f2e45d9172791925696717fe02 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 2 Dec 2022 16:44:15 +0500 Subject: [PATCH 14/20] Replace cnp.ndarray with memory views in the decision_function --- sklearn/svm/_libsvm.pyx | 110 +++++++++++++++++++++++++--------------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 76b28c2a6d5cf..10bd70a4c271a 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -521,24 +521,24 @@ def predict_proba( Parameters ---------- - X : array-like, dtype=float of shape (n_samples, n_features) + X : memory view on array, dtype=float of shape (n_samples, n_features) - support : array of shape (n_support,) + support : memory view on array of shape (n_support,) Index of support vectors in training set. - SV : array of shape (n_support, n_features) + SV : memory view on array of shape (n_support, n_features) Support vectors. - nSV : array of shape (n_class,) + nSV : memory view on array of shape (n_class,) Number of support vectors in each class. - sv_coef : array of shape (n_class-1, n_support) + sv_coef : memory view on array of shape (n_class-1, n_support) Coefficients of support vectors in decision function. - intercept : array of shape (n_class*(n_class-1)/2,) + intercept : memory view on array of shape (n_class*(n_class-1)/2,) Intercept in decision function. - probA, probB : array of shape (n_class*(n_class-1)/2,) + probA, probB : memory view on array of shape (n_class*(n_class-1)/2,) Probability estimates. svm_type : {0, 1, 2, 3, 4}, default=0 @@ -621,21 +621,23 @@ def predict_proba( def decision_function( - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] X, - cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] support, - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] SV, - cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] nSV, - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] sv_coef, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] intercept, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] probA=np.empty(0), - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] probB=np.empty(0), - int svm_type=0, kernel='rbf', int degree=3, - double gamma=0.1, double coef0=0., - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - class_weight=np.empty(0), - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - sample_weight=np.empty(0), - double cache_size=100.): + const cnp.float64_t[:,::1] X, + const cnp.int32_t[::1] support, + const cnp.float64_t[:, ::1] SV, + const cnp.int32_t[::1] nSV, + const cnp.float64_t[:, ::1] sv_coef, + const cnp.float64_t[::1] intercept, + const cnp.float64_t[::1] probA=np.empty(0), + const cnp.float64_t[::1] probB=np.empty(0), + int svm_type=0, + kernel='rbf', + int degree=3, + double gamma=0.1, + double coef0=0., + const cnp.float64_t[::1] class_weight=np.empty(0), + const cnp.float64_t[::1] sample_weight=np.empty(0), + double cache_size=100. +): """ Predict margin (libsvm name for this is predict_values) @@ -644,24 +646,24 @@ def decision_function( Parameters ---------- - X : array-like, dtype=float, size=[n_samples, n_features] + X : memory view on array, dtype=float, size=[n_samples, n_features] - support : array, shape=[n_support] + support : memory view on array, shape=[n_support] Index of support vectors in training set. - SV : array, shape=[n_support, n_features] + SV : memory view on array, shape=[n_support, n_features] Support vectors. - nSV : array, shape=[n_class] + nSV : memory view on array, shape=[n_class] Number of support vectors in each class. - sv_coef : array, shape=[n_class-1, n_support] + sv_coef : memory view on array, shape=[n_class-1, n_support] Coefficients of support vectors in decision function. - intercept : array, shape=[n_class*(n_class-1)/2] + intercept : memory view on array, shape=[n_class*(n_class-1)/2] Intercept in decision function. - probA, probB : array, shape=[n_class*(n_class-1)/2] + probA, probB : memory view on array, shape=[n_class*(n_class-1)/2] Probability estimates. svm_type : {0, 1, 2, 3, 4}, optional @@ -688,24 +690,43 @@ def decision_function( dec_values : array Predicted values. """ - cdef cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] dec_values + cdef cnp.float64_t[:, ::1] dec_values cdef svm_parameter param cdef svm_model *model cdef cnp.npy_intp n_class - cdef cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] \ - class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) cdef int rv - set_predict_params(¶m, svm_type, kernel, degree, gamma, coef0, - cache_size, 0, class_weight.shape[0], - class_weight_label.data, class_weight.data) + set_predict_params( + ¶m, + svm_type, + kernel, + degree, + gamma, + coef0, + cache_size, + 0, + class_weight.shape[0], + &class_weight_label[0] if class_weight_label.size > 0 else NULL, + &class_weight[0] if class_weight.size > 0 else NULL, + ) - model = set_model(¶m, nSV.shape[0], SV.data, SV.shape, - support.data, support.shape, sv_coef.strides, - sv_coef.data, intercept.data, nSV.data, - probA.data, probB.data) + model = set_model( + ¶m, + nSV.shape[0], + &SV[0, 0], + SV.shape, + &support[0], + support.shape, + sv_coef.strides, + &sv_coef[0, 0], + &intercept[0], + &nSV[0], + &probA[0] if probA.size > 0 else NULL, + &probB[0] if probB.size > 0 else NULL, + ) if svm_type > 1: n_class = 1 @@ -717,13 +738,20 @@ def decision_function( try: dec_values = np.empty((X.shape[0], n_class), dtype=np.float64) with nogil: - rv = copy_predict_values(X.data, model, X.shape, dec_values.data, n_class, &blas_functions) + rv = copy_predict_values( + &X[0, 0], + model, + X.shape, + &dec_values[0, 0], + n_class, + &blas_functions, + ) if rv < 0: raise MemoryError("We've run out of memory") finally: free_model(model) - return dec_values + return dec_values.base def cross_validation( From ac97f1ece3c2457f84a4d195d982e9095d379e0a Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 2 Dec 2022 17:39:09 +0500 Subject: [PATCH 15/20] Add empty check for SV in decision_function --- sklearn/svm/_libsvm.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 10bd70a4c271a..ddab91ae99ac4 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -716,7 +716,7 @@ def decision_function( model = set_model( ¶m, nSV.shape[0], - &SV[0, 0], + &SV[0, 0] if SV.size > 0 else NULL, SV.shape, &support[0], support.shape, From 1a2dfc7f26dfab61519987ca0e5456cf286c6895 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 2 Dec 2022 18:25:17 +0500 Subject: [PATCH 16/20] Replace cnp.ndarray with memory views in the cross_validation function --- setup.py | 1 + sklearn/svm/_libsvm.pyx | 93 ++++++++++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 29 deletions(-) diff --git a/setup.py b/setup.py index 13f44982dcfe1..14e42321dde12 100755 --- a/setup.py +++ b/setup.py @@ -99,6 +99,7 @@ "sklearn.metrics._pairwise_distances_reduction._radius_neighbors", "sklearn.metrics._pairwise_fast", "sklearn.neighbors._partition_nodes", + "sklearn.svm._libsvm", "sklearn.tree._splitter", "sklearn.tree._utils", "sklearn.utils._cython_blas", diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index ddab91ae99ac4..87e5707717e7c 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -755,27 +755,35 @@ def decision_function( def cross_validation( - cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] X, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] Y, - int n_fold, svm_type=0, kernel='rbf', int degree=3, - double gamma=0.1, double coef0=0., double tol=1e-3, - double C=1., double nu=0.5, double epsilon=0.1, - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - class_weight=np.empty(0), - cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] - sample_weight=np.empty(0), - int shrinking=0, int probability=0, double cache_size=100., + const cnp.float64_t[:, ::1] X, + const cnp.float64_t[::1] Y, + int n_fold, + svm_type=0, + kernel='rbf', + int degree=3, + double gamma=0.1, + double coef0=0., + double tol=1e-3, + double C=1., + double nu=0.5, + double epsilon=0.1, + cnp.float64_t[::1] class_weight=np.empty(0), + cnp.float64_t[::1] sample_weight=np.empty(0), + int shrinking=0, + int probability=0, + double cache_size=100., int max_iter=-1, - int random_seed=0): + int random_seed=0 +): """ Binding of the cross-validation routine (low-level routine) Parameters ---------- - X : array-like, dtype=float of shape (n_samples, n_features) + X : memory view on array, dtype=float of shape (n_samples, n_features) - Y : array, dtype=float of shape (n_samples,) + Y : memory view on array, dtype=float of shape (n_samples,) target vector n_fold : int32 @@ -813,13 +821,13 @@ def cross_validation( epsilon : double, default=0.1 Epsilon parameter in the epsilon-insensitive loss function. - class_weight : array, dtype=float64, shape (n_classes,), \ + class_weight : memory view on array, dtype=float64, shape (n_classes,), \ default=np.empty(0) Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. - sample_weight : array, dtype=float64, shape (n_samples,), \ + sample_weight : memory view on array, dtype=float64, shape (n_samples,), \ default=np.empty(0) Weights assigned to each sample. @@ -855,10 +863,11 @@ def cross_validation( if len(sample_weight) == 0: sample_weight = np.ones(X.shape[0], dtype=np.float64) else: - assert sample_weight.shape[0] == X.shape[0], \ - "sample_weight and X have incompatible shapes: " + \ - "sample_weight has %s samples while X has %s" % \ - (sample_weight.shape[0], X.shape[0]) + assert ( + sample_weight.shape[0] == X.shape[0], + "sample_weight and X have incompatible shapes: " + + "sample_weight has %s samples while X has %s" % (sample_weight.shape[0], X.shape[0]) + ) if X.shape[0] < n_fold: raise ValueError("Number of samples is less than number of folds") @@ -866,34 +875,60 @@ def cross_validation( # set problem kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) set_problem( - &problem, X.data, Y.data, sample_weight.data, X.shape, kernel_index) + &problem, + &X[0, 0], + &Y[0], + &sample_weight[0] if sample_weight.size > 0 else NULL, + X.shape, + kernel_index + ) if problem.x == NULL: raise MemoryError("Seems we've run out of memory") - cdef cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] \ - class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) # set parameters set_parameter( - ¶m, svm_type, kernel_index, degree, gamma, coef0, nu, cache_size, - C, tol, tol, shrinking, probability, - class_weight.shape[0], class_weight_label.data, - class_weight.data, max_iter, random_seed) + ¶m, + svm_type, + kernel_index, + degree, + gamma, + coef0, + nu, + cache_size, + C, + tol, + tol, + shrinking, + probability, + class_weight.shape[0], + &class_weight_label[0] if class_weight_label.size > 0 else NULL, + &class_weight[0] if class_weight.size > 0 else NULL, + max_iter, + random_seed + ) error_msg = svm_check_parameter(&problem, ¶m); if error_msg: raise ValueError(error_msg) - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] target + cdef cnp.float64_t[::1] target cdef BlasFunctions blas_functions blas_functions.dot = _dot[double] try: target = np.empty((X.shape[0]), dtype=np.float64) with nogil: - svm_cross_validation(&problem, ¶m, n_fold, target.data, &blas_functions) + svm_cross_validation( + &problem, + ¶m, + n_fold, + &target[0], + &blas_functions + ) finally: free(problem.x) - return target + return target.base def set_verbosity_wrap(int verbosity): From 94870127ba0c431ade6f840d22e0fb9349ce0176 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Mon, 5 Dec 2022 11:47:43 +0500 Subject: [PATCH 17/20] Revert additions in doc strings --- sklearn/svm/_libsvm.pyx | 58 ++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 87e5707717e7c..1dc6714d5e796 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -74,9 +74,9 @@ def fit( Parameters ---------- - X : memory view on array, dtype=float64 of shape (n_samples, n_features) + X : array-like, dtype=float64 of shape (n_samples, n_features) - Y : memory view on array, dtype=float64 of shape (n_samples,) + Y : array, dtype=float64 of shape (n_samples,) target vector svm_type : {0, 1, 2, 3, 4}, default=0 @@ -111,13 +111,13 @@ def fit( epsilon : double, default=0.1 Epsilon parameter in the epsilon-insensitive loss function. - class_weight : memory view on array, dtype=float64, shape (n_classes,), \ + class_weight : array, dtype=float64, shape (n_classes,), \ default=np.empty(0) Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. - sample_weight : memory view on array, dtype=float64, shape (n_samples,), \ + sample_weight : array, dtype=float64, shape (n_samples,), \ default=np.empty(0) Weights assigned to each sample. @@ -389,24 +389,24 @@ def predict( Parameters ---------- - X : memory view on array, dtype=float of shape (n_samples, n_features) + X : array-like, dtype=float of shape (n_samples, n_features) - support : memory view on array of shape (n_support,) + support : array of shape (n_support,) Index of support vectors in training set. - SV : memory view on array of shape (n_support, n_features) + SV : array of shape (n_support, n_features) Support vectors. - nSV : memory view on array of shape (n_class,) + nSV : array of shape (n_class,) Number of support vectors in each class. - sv_coef : memory view on array of shape (n_class-1, n_support) + sv_coef : array of shape (n_class-1, n_support) Coefficients of support vectors in decision function. - intercept : memory view on array of shape (n_class*(n_class-1)/2) + intercept : array of shape (n_class*(n_class-1)/2) Intercept in decision function. - probA, probB : memory view on array of shape (n_class*(n_class-1)/2,) + probA, probB : array of shape (n_class*(n_class-1)/2,) Probability estimates. svm_type : {0, 1, 2, 3, 4}, default=0 @@ -521,24 +521,24 @@ def predict_proba( Parameters ---------- - X : memory view on array, dtype=float of shape (n_samples, n_features) + X : array-like, dtype=float of shape (n_samples, n_features) - support : memory view on array of shape (n_support,) + support : array of shape (n_support,) Index of support vectors in training set. - SV : memory view on array of shape (n_support, n_features) + SV : array of shape (n_support, n_features) Support vectors. - nSV : memory view on array of shape (n_class,) + nSV : array of shape (n_class,) Number of support vectors in each class. - sv_coef : memory view on array of shape (n_class-1, n_support) + sv_coef : array of shape (n_class-1, n_support) Coefficients of support vectors in decision function. - intercept : memory view on array of shape (n_class*(n_class-1)/2,) + intercept : array of shape (n_class*(n_class-1)/2,) Intercept in decision function. - probA, probB : memory view on array of shape (n_class*(n_class-1)/2,) + probA, probB : array of shape (n_class*(n_class-1)/2,) Probability estimates. svm_type : {0, 1, 2, 3, 4}, default=0 @@ -646,24 +646,24 @@ def decision_function( Parameters ---------- - X : memory view on array, dtype=float, size=[n_samples, n_features] + X : array-like, dtype=float, size=[n_samples, n_features] - support : memory view on array, shape=[n_support] + support : array, shape=[n_support] Index of support vectors in training set. - SV : memory view on array, shape=[n_support, n_features] + SV : array, shape=[n_support, n_features] Support vectors. - nSV : memory view on array, shape=[n_class] + nSV : array, shape=[n_class] Number of support vectors in each class. - sv_coef : memory view on array, shape=[n_class-1, n_support] + sv_coef : array, shape=[n_class-1, n_support] Coefficients of support vectors in decision function. - intercept : memory view on array, shape=[n_class*(n_class-1)/2] + intercept : array, shape=[n_class*(n_class-1)/2] Intercept in decision function. - probA, probB : memory view on array, shape=[n_class*(n_class-1)/2] + probA, probB : array, shape=[n_class*(n_class-1)/2] Probability estimates. svm_type : {0, 1, 2, 3, 4}, optional @@ -781,9 +781,9 @@ def cross_validation( Parameters ---------- - X : memory view on array, dtype=float of shape (n_samples, n_features) + X : array-like, dtype=float of shape (n_samples, n_features) - Y : memory view on array, dtype=float of shape (n_samples,) + Y : array, dtype=float of shape (n_samples,) target vector n_fold : int32 @@ -821,13 +821,13 @@ def cross_validation( epsilon : double, default=0.1 Epsilon parameter in the epsilon-insensitive loss function. - class_weight : memory view on array, dtype=float64, shape (n_classes,), \ + class_weight : array, dtype=float64, shape (n_classes,), \ default=np.empty(0) Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. - sample_weight : memory view on array, dtype=float64, shape (n_samples,), \ + sample_weight : array, dtype=float64, shape (n_samples,), \ default=np.empty(0) Weights assigned to each sample. From 334ee887400b38157dd0e28ad8d27153879d0fd2 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 30 Dec 2022 18:26:54 +0500 Subject: [PATCH 18/20] Addressed comments on PR related to black formatting --- sklearn/svm/_libsvm.pyx | 106 +++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 57 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 1dc6714d5e796..a791a0a8cce27 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -67,7 +67,7 @@ def fit( int probability=0, double cache_size=100., int max_iter=-1, - int random_seed=0 + int random_seed=0, ): """ Train the model using libsvm (low-level method) @@ -175,8 +175,9 @@ def fit( else: assert ( sample_weight.shape[0] == X.shape[0], - "sample_weight and X have incompatible shapes: " + - "sample_weight has %s samples while X has %s" % (sample_weight.shape[0], X.shape[0]) + f"sample_weight and X have incompatible shapes: " + f"sample_weight has {sample_weight.shape[0]} samples while " + f"X has {X.shape[0]}" ) kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) @@ -185,12 +186,14 @@ def fit( &X[0, 0], &Y[0], &sample_weight[0], - X.shape, + X.shape, kernel_index, ) if problem.x == NULL: raise MemoryError("Seems we've run out of memory") - cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange( + class_weight.shape[0], dtype=np.int32 + ) set_parameter( ¶m, svm_type, @@ -236,24 +239,16 @@ def fit( ) cdef cnp.float64_t[:, ::1] sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64) - copy_sv_coef ( - &sv_coef[0, 0] if sv_coef.size > 0 else NULL, - model - ) + copy_sv_coef( &sv_coef[0, 0] if sv_coef.size > 0 else NULL, model) # the intercept is just model.rho but with sign changed - cdef cnp.float64_t[::1] intercept = np.empty(int((n_class*(n_class-1))/2), dtype=np.float64) - copy_intercept ( - &intercept[0], - model, - intercept.shape + cdef cnp.float64_t[::1] intercept = np.empty( + int((n_class*(n_class-1))/2), dtype=np.float64 ) + copy_intercept( &intercept[0], model, intercept.shape) - cdef cnp.int32_t[::1] support = np.empty (SV_len, dtype=np.int32) - copy_support ( - &support[0] if support.size > 0 else NULL, - model - ) + cdef cnp.int32_t[::1] support = np.empty(SV_len, dtype=np.int32) + copy_support( &support[0] if support.size > 0 else NULL, model) # copy model.SV cdef cnp.float64_t[:, ::1] support_vectors @@ -265,16 +260,13 @@ def fit( copy_SV( &support_vectors[0, 0] if support_vectors.size > 0 else NULL, model, - support_vectors.shape + support_vectors.shape, ) cdef cnp.int32_t[::1] n_class_SV if svm_type == 0 or svm_type == 1: n_class_SV = np.empty(n_class, dtype=np.int32) - copy_nSV( - &n_class_SV[0] if n_class_SV.size > 0 else NULL, - model - ) + copy_nSV( &n_class_SV[0] if n_class_SV.size > 0 else NULL, model) else: # OneClass and SVR are considered to have 2 classes n_class_SV = np.array([SV_len, SV_len], dtype=np.int32) @@ -285,19 +277,11 @@ def fit( if svm_type < 2: # SVC and NuSVC probA = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64) probB = np.empty(int(n_class*(n_class-1)/2), dtype=np.float64) - copy_probB( - &probB[0], - model, - probB.shape - ) + copy_probB( &probB[0], model, probB.shape) else: probA = np.empty(1, dtype=np.float64) probB = np.empty(0, dtype=np.float64) - copy_probA( - &probA[0], - model, - probA.shape - ) + copy_probA( &probA[0], model, probA.shape) else: probA = np.empty(0, dtype=np.float64) probB = np.empty(0, dtype=np.float64) @@ -329,7 +313,7 @@ cdef void set_predict_params( int probability, int nr_weight, char *weight_label, - char *weight + char *weight, ) except *: """Fill param with prediction time-only parameters.""" @@ -362,7 +346,7 @@ cdef void set_predict_params( weight_label, weight, max_iter, - random_seed + random_seed, ) @@ -379,10 +363,10 @@ def predict( kernel='rbf', int degree=3, double gamma=0.1, - double coef0=0., + double coef0=0.0, const cnp.float64_t[::1] class_weight=np.empty(0), const cnp.float64_t[::1] sample_weight=np.empty(0), - double cache_size=100. + double cache_size=100.0, ): """ Predict target values of X given a model (low-level method) @@ -438,7 +422,9 @@ def predict( cdef svm_model *model cdef int rv - cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange( + class_weight.shape[0], dtype=np.int32 + ) set_predict_params( ¶m, @@ -501,10 +487,10 @@ def predict_proba( kernel='rbf', int degree=3, double gamma=0.1, - double coef0=0., + double coef0=0.0, cnp.float64_t[::1] class_weight=np.empty(0), cnp.float64_t[::1] sample_weight=np.empty(0), - double cache_size=100. + double cache_size=100.0, ): """ Predict probabilities @@ -568,7 +554,9 @@ def predict_proba( cdef cnp.float64_t[:, ::1] dec_values cdef svm_parameter param cdef svm_model *model - cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange( + class_weight.shape[0], dtype=np.int32 + ) cdef int rv set_predict_params( @@ -580,7 +568,7 @@ def predict_proba( coef0, cache_size, 1, - class_weight.shape[0], + class_weight.shape[0], &class_weight_label[0] if class_weight_label.size > 0 else NULL, &class_weight[0] if class_weight.size > 0 else NULL, ) @@ -610,7 +598,7 @@ def predict_proba( model, X.shape, &dec_values[0, 0], - &blas_functions + &blas_functions, ) if rv < 0: raise MemoryError("We've run out of memory") @@ -633,10 +621,10 @@ def decision_function( kernel='rbf', int degree=3, double gamma=0.1, - double coef0=0., + double coef0=0.0, const cnp.float64_t[::1] class_weight=np.empty(0), const cnp.float64_t[::1] sample_weight=np.empty(0), - double cache_size=100. + double cache_size=100.0, ): """ Predict margin (libsvm name for this is predict_values) @@ -695,7 +683,9 @@ def decision_function( cdef svm_model *model cdef cnp.npy_intp n_class - cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange( + class_weight.shape[0], dtype=np.int32 + ) cdef int rv @@ -758,22 +748,22 @@ def cross_validation( const cnp.float64_t[:, ::1] X, const cnp.float64_t[::1] Y, int n_fold, - svm_type=0, + int svm_type=0, kernel='rbf', int degree=3, double gamma=0.1, double coef0=0., double tol=1e-3, - double C=1., + double C=1.0, double nu=0.5, double epsilon=0.1, cnp.float64_t[::1] class_weight=np.empty(0), cnp.float64_t[::1] sample_weight=np.empty(0), int shrinking=0, int probability=0, - double cache_size=100., + double cache_size=100.0, int max_iter=-1, - int random_seed=0 + int random_seed=0, ): """ Binding of the cross-validation routine (low-level routine) @@ -865,8 +855,8 @@ def cross_validation( else: assert ( sample_weight.shape[0] == X.shape[0], - "sample_weight and X have incompatible shapes: " + - "sample_weight has %s samples while X has %s" % (sample_weight.shape[0], X.shape[0]) + f"sample_weight and X have incompatible shapes: sample_weight has " + f"{sample_weight.shape[0]} samples while X has {X.shape[0]}" ) if X.shape[0] < n_fold: @@ -880,11 +870,13 @@ def cross_validation( &Y[0], &sample_weight[0] if sample_weight.size > 0 else NULL, X.shape, - kernel_index + kernel_index, ) if problem.x == NULL: raise MemoryError("Seems we've run out of memory") - cdef cnp.int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.int32) + cdef cnp.int32_t[::1] class_weight_label = np.arange( + class_weight.shape[0], dtype=np.int32 + ) # set parameters set_parameter( @@ -905,7 +897,7 @@ def cross_validation( &class_weight_label[0] if class_weight_label.size > 0 else NULL, &class_weight[0] if class_weight.size > 0 else NULL, max_iter, - random_seed + random_seed, ) error_msg = svm_check_parameter(&problem, ¶m); @@ -923,7 +915,7 @@ def cross_validation( ¶m, n_fold, &target[0], - &blas_functions + &blas_functions, ) finally: free(problem.x) From c20b80d42b258f436fc0d30b0f69bfc54d84bcfe Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 30 Dec 2022 18:41:05 +0500 Subject: [PATCH 19/20] Some further black formatting --- sklearn/svm/_libsvm.pyx | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index a791a0a8cce27..6f37f18739826 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -56,9 +56,9 @@ def fit( kernel='rbf', int degree=3, double gamma=0.1, - double coef0=0., + double coef0=0.0, double tol=1e-3, - double C=1., + double C=1.0, double nu=0.5, double epsilon=0.1, const cnp.float64_t[::1] class_weight=np.empty(0), @@ -175,9 +175,8 @@ def fit( else: assert ( sample_weight.shape[0] == X.shape[0], - f"sample_weight and X have incompatible shapes: " - f"sample_weight has {sample_weight.shape[0]} samples while " - f"X has {X.shape[0]}" + f"sample_weight and X have incompatible shapes: sample_weight has " + f"{sample_weight.shape[0]} samples while X has {X.shape[0]}" ) kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) @@ -212,7 +211,7 @@ def fit( &class_weight_label[0] if class_weight_label.size > 0 else NULL, &class_weight[0] if class_weight.size > 0 else NULL, max_iter, - random_seed + random_seed, ) error_msg = svm_check_parameter(&problem, ¶m) @@ -233,10 +232,7 @@ def fit( n_class = get_nr(model) cdef int[::1] n_iter = np.empty(max(1, n_class * (n_class - 1) // 2), dtype=np.intc) - copy_n_iter( - &n_iter[0], - model - ) + copy_n_iter( &n_iter[0], model) cdef cnp.float64_t[:, ::1] sv_coef = np.empty((n_class-1, SV_len), dtype=np.float64) copy_sv_coef( &sv_coef[0, 0] if sv_coef.size > 0 else NULL, model) @@ -318,12 +314,12 @@ cdef void set_predict_params( """Fill param with prediction time-only parameters.""" # training-time only parameters - cdef double C = .0 - cdef double epsilon = .1 + cdef double C = 0.0 + cdef double epsilon = 0.1 cdef int max_iter = 0 - cdef double nu = .5 + cdef double nu = 0.5 cdef int shrinking = 0 - cdef double tol = .1 + cdef double tol = 0.1 cdef int random_seed = -1 kernel_index = LIBSVM_KERNEL_TYPES.index(kernel) From 87229f9d80295c38da684155e2550ea77f597134 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 30 Dec 2022 19:33:40 +0500 Subject: [PATCH 20/20] Black refactor --- sklearn/svm/_libsvm.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 6f37f18739826..9bda1d14331d7 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -748,7 +748,7 @@ def cross_validation( kernel='rbf', int degree=3, double gamma=0.1, - double coef0=0., + double coef0=0.0, double tol=1e-3, double C=1.0, double nu=0.5,