diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 2efbd87ac3b10..f04ae4cc41eca 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -263,6 +263,12 @@ Enhancements generating attribute ``estimators_samples_`` only when it is needed. By `David Staub`_. + - :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso` + now works with ``np.float32`` input data without converting it + into ``np.float64``. This allows to reduce the memory + consumption. + (`#6913 `_) + By `YenChen Lin`_. Bug fixes ......... diff --git a/sklearn/linear_model/cd_fast.pyx b/sklearn/linear_model/cd_fast.pyx index 847ef1e98cb4e..1dcd10feb1063 100644 --- a/sklearn/linear_model/cd_fast.pyx +++ b/sklearn/linear_model/cd_fast.pyx @@ -13,10 +13,16 @@ import numpy.linalg as linalg cimport cython from cpython cimport bool +from cython cimport floating import warnings ctypedef np.float64_t DOUBLE ctypedef np.uint32_t UINT32_t +ctypedef floating (*DOT)(int N, floating *X, int incX, floating *Y, + int incY) nogil +ctypedef void (*AXPY)(int N, floating alpha, floating *X, int incX, + floating *Y, int incY) nogil +ctypedef floating (*ASUM)(int N, floating *X, int incX) nogil np.import_array() @@ -42,13 +48,13 @@ cdef inline UINT32_t rand_int(UINT32_t end, UINT32_t* random_state) nogil: return our_rand_r(random_state) % end -cdef inline double fmax(double x, double y) nogil: +cdef inline floating fmax(floating x, floating y) nogil: if x > y: return x return y -cdef inline double fsign(double f) nogil: +cdef inline floating fsign(floating f) nogil: if f == 0: return 0 elif f > 0: @@ -57,11 +63,11 @@ cdef inline double fsign(double f) nogil: return -1.0 -cdef double abs_max(int n, double* a) nogil: +cdef floating abs_max(int n, floating* a) nogil: """np.max(np.abs(a))""" cdef int i - cdef double m = fabs(a[0]) - cdef double d + cdef floating m = fabs(a[0]) + cdef floating d for i in range(1, n): d = fabs(a[i]) if d > m: @@ -69,11 +75,11 @@ cdef double abs_max(int n, double* a) nogil: return m -cdef double max(int n, double* a) nogil: +cdef floating max(int n, floating* a) nogil: """np.max(a)""" cdef int i - cdef double m = a[0] - cdef double d + cdef floating m = a[0] + cdef floating d for i in range(1, n): d = a[i] if d > m: @@ -81,11 +87,11 @@ cdef double max(int n, double* a) nogil: return m -cdef double diff_abs_max(int n, double* a, double* b) nogil: +cdef floating diff_abs_max(int n, floating* a, floating* b) nogil: """np.max(np.abs(a - b))""" cdef int i - cdef double m = fabs(a[0] - b[0]) - cdef double d + cdef floating m = fabs(a[0] - b[0]) + cdef floating d for i in range(1, n): d = fabs(a[i] - b[i]) if d > m: @@ -105,29 +111,35 @@ cdef extern from "cblas.h": void daxpy "cblas_daxpy"(int N, double alpha, double *X, int incX, double *Y, int incY) nogil + void saxpy "cblas_saxpy"(int N, float alpha, float *X, int incX, + float *Y, int incY) nogil double ddot "cblas_ddot"(int N, double *X, int incX, double *Y, int incY ) nogil + float sdot "cblas_sdot"(int N, float *X, int incX, float *Y, + int incY) nogil double dasum "cblas_dasum"(int N, double *X, int incX) nogil + float sasum "cblas_sasum"(int N, float *X, int incX) nogil void dger "cblas_dger"(CBLAS_ORDER Order, int M, int N, double alpha, - double *X, int incX, double *Y, int incY, double *A, int lda) nogil - void dgemv "cblas_dgemv"(CBLAS_ORDER Order, - CBLAS_TRANSPOSE TransA, int M, int N, - double alpha, double *A, int lda, - double *X, int incX, double beta, - double *Y, int incY) nogil + double *X, int incX, double *Y, int incY, + double *A, int lda) nogil + void dgemv "cblas_dgemv"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, + int M, int N, double alpha, double *A, int lda, + double *X, int incX, double beta, + double *Y, int incY) nogil double dnrm2 "cblas_dnrm2"(int N, double *X, int incX) nogil - void dcopy "cblas_dcopy"(int N, double *X, int incX, double *Y, int incY) nogil + void dcopy "cblas_dcopy"(int N, double *X, int incX, double *Y, + int incY) nogil void dscal "cblas_dscal"(int N, double alpha, double *X, int incX) nogil @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) -def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w, - double alpha, double beta, - np.ndarray[DOUBLE, ndim=2, mode='fortran'] X, - np.ndarray[DOUBLE, ndim=1, mode='c'] y, - int max_iter, double tol, +def enet_coordinate_descent(np.ndarray[floating, ndim=1] w, + floating alpha, floating beta, + np.ndarray[floating, ndim=2, mode='fortran'] X, + np.ndarray[floating, ndim=1, mode='c'] y, + int max_iter, floating tol, object rng, bint random=0, bint positive=0): """Cython version of the coordinate descent algorithm for Elastic-Net regression @@ -138,31 +150,49 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w, """ + # fused types version of BLAS functions + cdef DOT dot + cdef AXPY axpy + cdef ASUM asum + + if floating is float: + dtype = np.float32 + dot = sdot + axpy = saxpy + asum = sasum + else: + dtype = np.float64 + dot = ddot + axpy = daxpy + asum = dasum + # get the data information into easy vars cdef unsigned int n_samples = X.shape[0] cdef unsigned int n_features = X.shape[1] # get the number of tasks indirectly, using strides - cdef unsigned int n_tasks = y.strides[0] / sizeof(DOUBLE) + cdef unsigned int n_tasks = y.strides[0] / sizeof(floating) # compute norms of the columns of X - cdef np.ndarray[DOUBLE, ndim=1] norm_cols_X = (X**2).sum(axis=0) + cdef np.ndarray[floating, ndim=1] norm_cols_X = (X**2).sum(axis=0) # initial value of the residuals - cdef np.ndarray[DOUBLE, ndim=1] R = np.empty(n_samples) - - cdef np.ndarray[DOUBLE, ndim=1] XtA = np.empty(n_features) - cdef double tmp - cdef double w_ii - cdef double d_w_max - cdef double w_max - cdef double d_w_ii - cdef double gap = tol + 1.0 - cdef double d_w_tol = tol - cdef double dual_norm_XtA - cdef double R_norm2 - cdef double w_norm2 - cdef double l1_norm + cdef np.ndarray[floating, ndim=1] R = np.empty(n_samples, dtype=dtype) + cdef np.ndarray[floating, ndim=1] XtA = np.empty(n_features, dtype=dtype) + + cdef floating tmp + cdef floating w_ii + cdef floating d_w_max + cdef floating w_max + cdef floating d_w_ii + cdef floating gap = tol + 1.0 + cdef floating d_w_tol = tol + cdef floating dual_norm_XtA + cdef floating R_norm2 + cdef floating w_norm2 + cdef floating l1_norm + cdef floating const + cdef floating A_norm2 cdef unsigned int ii cdef unsigned int i cdef unsigned int n_iter = 0 @@ -170,6 +200,12 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w, cdef UINT32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX) cdef UINT32_t* rand_r_state = &rand_r_state_seed + cdef floating *X_data = X.data + cdef floating *y_data = y.data + cdef floating *w_data = w.data + cdef floating *R_data = R.data + cdef floating *XtA_data = XtA.data + if alpha == 0: warnings.warn("Coordinate descent with alpha=0 may lead to unexpected" " results and is discouraged.") @@ -177,13 +213,10 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w, with nogil: # R = y - np.dot(X, w) for i in range(n_samples): - R[i] = y[i] - ddot(n_features, - (X.data + i * sizeof(DOUBLE)), - n_samples, w.data, 1) + R[i] = y[i] - dot(n_features, &X_data[i], n_samples, w_data, 1) # tol *= np.dot(y, y) - tol *= ddot(n_samples, y.data, n_tasks, - y.data, n_tasks) + tol *= dot(n_samples, y_data, n_tasks, y_data, n_tasks) for n_iter in range(max_iter): w_max = 0.0 @@ -201,14 +234,11 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w, if w_ii != 0.0: # R += w_ii * X[:,ii] - daxpy(n_samples, w_ii, - (X.data + ii * n_samples * sizeof(DOUBLE)), - 1, R.data, 1) + axpy(n_samples, w_ii, &X_data[ii * n_samples], 1, + R_data, 1) # tmp = (X[:,ii]*R).sum() - tmp = ddot(n_samples, - (X.data + ii * n_samples * sizeof(DOUBLE)), - 1, R.data, 1) + tmp = dot(n_samples, &X_data[ii * n_samples], 1, R_data, 1) if positive and tmp < 0: w[ii] = 0.0 @@ -218,9 +248,8 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w, if w[ii] != 0.0: # R -= w[ii] * X[:,ii] # Update residual - daxpy(n_samples, -w[ii], - (X.data + ii * n_samples * sizeof(DOUBLE)), - 1, R.data, 1) + axpy(n_samples, -w[ii], &X_data[ii * n_samples], 1, + R_data, 1) # update the maximum absolute coefficient update d_w_ii = fabs(w[ii] - w_ii) @@ -230,32 +259,28 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w, if fabs(w[ii]) > w_max: w_max = fabs(w[ii]) - if (w_max == 0.0 - or d_w_max / w_max < d_w_tol - or n_iter == max_iter - 1): + if (w_max == 0.0 or + d_w_max / w_max < d_w_tol or + n_iter == max_iter - 1): # the biggest coordinate update of this iteration was smaller # than the tolerance: check the duality gap as ultimate # stopping criterion # XtA = np.dot(X.T, R) - beta * w for i in range(n_features): - XtA[i] = ddot( - n_samples, - (X.data + i * n_samples *sizeof(DOUBLE)), - 1, R.data, 1) - beta * w[i] + XtA[i] = dot(n_samples, &X_data[i * n_samples], + 1, R_data, 1) - beta * w[i] if positive: - dual_norm_XtA = max(n_features, XtA.data) + dual_norm_XtA = max(n_features, XtA_data) else: - dual_norm_XtA = abs_max(n_features, XtA.data) + dual_norm_XtA = abs_max(n_features, XtA_data) # R_norm2 = np.dot(R, R) - R_norm2 = ddot(n_samples, R.data, 1, - R.data, 1) + R_norm2 = dot(n_samples, R_data, 1, R_data, 1) # w_norm2 = np.dot(w, w) - w_norm2 = ddot(n_features, w.data, 1, - w.data, 1) + w_norm2 = dot(n_features, w_data, 1, w_data, 1) if (dual_norm_XtA > alpha): const = alpha / dual_norm_XtA @@ -265,33 +290,30 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w, const = 1.0 gap = R_norm2 - l1_norm = dasum(n_features, w.data, 1) + l1_norm = asum(n_features, w_data, 1) # np.dot(R.T, y) - gap += (alpha * l1_norm - const * ddot( - n_samples, - R.data, 1, - y.data, n_tasks) + gap += (alpha * l1_norm + - const * dot(n_samples, R_data, 1, y_data, n_tasks) + 0.5 * beta * (1 + const ** 2) * (w_norm2)) if gap < tol: # return if we reached desired tolerance break - return w, gap, tol, n_iter + 1 @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) -def sparse_enet_coordinate_descent(double[:] w, - double alpha, double beta, - np.ndarray[double, ndim=1, mode='c'] X_data, +def sparse_enet_coordinate_descent(floating [:] w, + floating alpha, floating beta, + np.ndarray[floating, ndim=1, mode='c'] X_data, np.ndarray[int, ndim=1, mode='c'] X_indices, np.ndarray[int, ndim=1, mode='c'] X_indptr, - np.ndarray[double, ndim=1] y, - double[:] X_mean, int max_iter, - double tol, object rng, bint random=0, + np.ndarray[floating, ndim=1] y, + floating[:] X_mean, int max_iter, + floating tol, object rng, bint random=0, bint positive=0): """Cython version of the coordinate descent algorithm for Elastic-Net @@ -307,30 +329,54 @@ def sparse_enet_coordinate_descent(double[:] w, # compute norms of the columns of X cdef unsigned int ii - cdef double[:] norm_cols_X = np.zeros(n_features, np.float64) + cdef floating[:] norm_cols_X cdef unsigned int startptr = X_indptr[0] cdef unsigned int endptr # get the number of tasks indirectly, using strides - cdef unsigned int n_tasks = y.strides[0] / sizeof(DOUBLE) + cdef unsigned int n_tasks # initial value of the residuals - cdef double[:] R = y.copy() + cdef floating[:] R = y.copy() - cdef double[:] X_T_R = np.zeros(n_features) - cdef double[:] XtA = np.zeros(n_features) + cdef floating[:] X_T_R + cdef floating[:] XtA - cdef double tmp - cdef double w_ii - cdef double d_w_max - cdef double w_max - cdef double d_w_ii - cdef double X_mean_ii - cdef double R_sum = 0.0 - cdef double normalize_sum - cdef double gap = tol + 1.0 - cdef double d_w_tol = tol + # fused types version of BLAS functions + cdef DOT dot + cdef ASUM asum + + if floating is float: + dtype = np.float32 + n_tasks = y.strides[0] / sizeof(float) + dot = sdot + asum = sasum + else: + dtype = np.float64 + n_tasks = y.strides[0] / sizeof(DOUBLE) + dot = ddot + asum = dasum + + norm_cols_X = np.zeros(n_features, dtype=dtype) + X_T_R = np.zeros(n_features, dtype=dtype) + XtA = np.zeros(n_features, dtype=dtype) + + cdef floating tmp + cdef floating w_ii + cdef floating d_w_max + cdef floating w_max + cdef floating d_w_ii + cdef floating X_mean_ii + cdef floating R_sum = 0.0 + cdef floating R_norm2 + cdef floating w_norm2 + cdef floating A_norm2 + cdef floating l1_norm + cdef floating normalize_sum + cdef floating gap = tol + 1.0 + cdef floating d_w_tol = tol + cdef floating dual_norm_XtA cdef unsigned int jj cdef unsigned int n_iter = 0 cdef unsigned int f_iter @@ -363,7 +409,7 @@ def sparse_enet_coordinate_descent(double[:] w, startptr = endptr # tol *= np.dot(y, y) - tol *= ddot(n_samples, &y[0], 1, &y[0], 1) + tol *= dot(n_samples, &y[0], 1, &y[0], 1) for n_iter in range(max_iter): @@ -451,10 +497,10 @@ def sparse_enet_coordinate_descent(double[:] w, dual_norm_XtA = abs_max(n_features, &XtA[0]) # R_norm2 = np.dot(R, R) - R_norm2 = ddot(n_samples, &R[0], 1, &R[0], 1) + R_norm2 = dot(n_samples, &R[0], 1, &R[0], 1) # w_norm2 = np.dot(w, w) - w_norm2 = ddot(n_features, &w[0], 1, &w[0], 1) + w_norm2 = dot(n_features, &w[0], 1, &w[0], 1) if (dual_norm_XtA > alpha): const = alpha / dual_norm_XtA A_norm2 = R_norm2 * const**2 @@ -463,13 +509,12 @@ def sparse_enet_coordinate_descent(double[:] w, const = 1.0 gap = R_norm2 - l1_norm = dasum(n_features, &w[0], 1) + l1_norm = asum(n_features, &w[0], 1) - # The expression inside ddot is equivalent to np.dot(R.T, y) - gap += (alpha * l1_norm - const * ddot( + gap += (alpha * l1_norm - const * dot( n_samples, - &R[0], 1, - &y[0], n_tasks + &R[0], 1, + &y[0], n_tasks ) + 0.5 * beta * (1 + const ** 2) * w_norm2) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 038a9830c59f3..45d8b265324d9 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -372,16 +372,18 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, ElasticNet ElasticNetCV """ - # We expect X and y to be already float64 Fortran ordered when bypassing + # We expect X and y to be already Fortran ordered when bypassing # checks if check_input: - X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X) - y = check_array(y, 'csc', dtype=np.float64, order='F', copy=False, + X = check_array(X, 'csc', dtype=[np.float64, np.float32], + order='F', copy=copy_X) + y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False, ensure_2d=False) if Xy is not None: # Xy should be a 1d contiguous array or a 2D C ordered array - Xy = check_array(Xy, dtype=np.float64, order='C', copy=False, + Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False, ensure_2d=False) + n_samples, n_features = X.shape multi_output = False @@ -395,8 +397,9 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, # As sparse matrices are not actually centered we need this # to be passed to the CD solver. X_sparse_scaling = params['X_offset'] / params['X_scale'] + X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype) else: - X_sparse_scaling = np.zeros(n_features) + X_sparse_scaling = np.zeros(n_features, dtype=X.dtype) # X should be normalized and fit already if function is called # from ElasticNet.fit @@ -426,15 +429,15 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, random = (selection == 'random') if not multi_output: - coefs = np.empty((n_features, n_alphas), dtype=np.float64) + coefs = np.empty((n_features, n_alphas), dtype=X.dtype) else: coefs = np.empty((n_outputs, n_features, n_alphas), - dtype=np.float64) + dtype=X.dtype) if coef_init is None: - coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1])) + coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype)) else: - coef_ = np.asfortranarray(coef_init) + coef_ = np.asfortranarray(coef_init, dtype=X.dtype) for i, alpha in enumerate(alphas): l1_reg = alpha * l1_ratio * n_samples @@ -470,7 +473,9 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, if dual_gap_ > eps_: warnings.warn('Objective did not converge.' + ' You might want' + - ' to increase the number of iterations', + ' to increase the number of iterations.' + + ' Fitting data with very small alpha' + + ' may cause precision problems.', ConvergenceWarning) if verbose: @@ -663,16 +668,16 @@ def fit(self, X, y, check_input=True): raise ValueError('precompute should be one of True, False or' ' array-like. Got %r' % self.precompute) - # We expect X and y to be already float64 Fortran ordered arrays + # We expect X and y to be float64 or float32 Fortran ordered arrays # when bypassing checks if check_input: - y = np.asarray(y, dtype=np.float64) - X, y = check_X_y(X, y, accept_sparse='csc', dtype=np.float64, - order='F', + X, y = check_X_y(X, y, accept_sparse='csc', + order='F', dtype=[np.float64, np.float32], copy=self.copy_X and self.fit_intercept, multi_output=True, y_numeric=True) - y = check_array(y, dtype=np.float64, order='F', copy=False, + y = check_array(y, order='F', copy=False, dtype=X.dtype.type, ensure_2d=False) + X, y, X_offset, y_offset, X_scale, precompute, Xy = \ _pre_fit(X, y, None, self.precompute, self.normalize, self.fit_intercept, copy=False) @@ -688,14 +693,14 @@ def fit(self, X, y, check_input=True): raise ValueError("selection should be either random or cyclic.") if not self.warm_start or self.coef_ is None: - coef_ = np.zeros((n_targets, n_features), dtype=np.float64, + coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order='F') else: coef_ = self.coef_ if coef_.ndim == 1: coef_ = coef_[np.newaxis, :] - dual_gaps_ = np.zeros(n_targets, dtype=np.float64) + dual_gaps_ = np.zeros(n_targets, dtype=X.dtype) self.n_iter_ = [] for k in xrange(n_targets): @@ -725,6 +730,9 @@ def fit(self, X, y, check_input=True): self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_]) self._set_intercept(X_offset, y_offset, X_scale) + # workaround since _set_intercept will cast self.coef_ into float64 + self.coef_ = np.asarray(self.coef_, dtype=X.dtype) + # return self for chaining fit and predict calls return self diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 918180ce18915..9065c5b97dc4f 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -670,3 +670,32 @@ def test_lasso_non_float_y(): clf_float = model(fit_intercept=False) clf_float.fit(X, y_float) assert_array_equal(clf.coef_, clf_float.coef_) + + +def test_enet_float_precision(): + # Generate dataset + X, y, X_test, y_test = build_dataset(n_samples=20, n_features=10) + # Here we have a small number of iterations, and thus the + # ElasticNet might not converge. This is to speed up tests + + for normalize in [True, False]: + for fit_intercept in [True, False]: + coef = {} + intercept = {} + clf = ElasticNet(alpha=0.5, max_iter=100, precompute=False, + fit_intercept=fit_intercept, normalize=normalize) + for dtype in [np.float64, np.float32]: + X = dtype(X) + y = dtype(y) + ignore_warnings(clf.fit)(X, y) + + coef[dtype] = clf.coef_ + intercept[dtype] = clf.intercept_ + + assert_equal(clf.coef_.dtype, dtype) + + assert_array_almost_equal(coef[np.float32], coef[np.float64], + decimal=4) + assert_array_almost_equal(intercept[np.float32], + intercept[np.float64], + decimal=4) diff --git a/sklearn/src/cblas/ATL_dsrefdot.c b/sklearn/src/cblas/ATL_dsrefdot.c new file mode 100644 index 0000000000000..442e51a08e207 --- /dev/null +++ b/sklearn/src/cblas/ATL_dsrefdot.c @@ -0,0 +1,141 @@ +/* --------------------------------------------------------------------- + * + * -- Automatically Tuned Linear Algebra Software (ATLAS) + * (C) Copyright 2000 All Rights Reserved + * + * -- ATLAS routine -- Version 3.2 -- December 25, 2000 + * + * Author : Antoine P. Petitet + * Originally developed at the University of Tennessee, + * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA. + * + * --------------------------------------------------------------------- + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in + * the documentation and/or other materials provided with the distri- + * bution. + * 3. The name of the University, the ATLAS group, or the names of its + * contributors may not be used to endorse or promote products deri- + * ved from this software without specific written permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO- + * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN- + * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "atlas_refmisc.h" +#include "atlas_reflevel1.h" + +double ATL_dsrefdot +( + const int N, + const float * X, + const int INCX, + const float * Y, + const int INCY +) +{ +/* + * Purpose + * ======= + * + * ATL_dsrefdot returns the dot product x^T * y of two n-vectors x and + * y. The result is internally computed using double precision arithme- + * tic. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. Unchanged on exit. + * + * X (input) const float * + * On entry, X points to the first entry to be accessed of an + * incremented array of size equal to or greater than + * ( 1 + ( n - 1 ) * abs( INCX ) ) * sizeof( float ), + * that contains the vector x. Unchanged on exit. + * + * INCX (input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. Unchanged on exit. + * + * Y (input) const float * + * On entry, Y points to the first entry to be accessed of an + * incremented array of size equal to or greater than + * ( 1 + ( n - 1 ) * abs( INCY ) ) * sizeof( float ), + * that contains the vector y. Unchanged on exit. + * + * INCY (input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. Unchanged on exit. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register double dot = ATL_dZERO, x0, x1, x2, x3, y0, y1, y2, y3; + float * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; +/* .. + * .. Executable Statements .. + * + */ + if( N > 0 ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = (float *)X + nu * INCX; + + do + { + x0 = (double)(*X); y0 = (double)(*Y); + x1 = (double)(X[INCX ]); y1 = (double)(Y[INCY ]); + x2 = (double)(X[incX2]); y2 = (double)(Y[incY2]); + x3 = (double)(X[incX3]); y3 = (double)(Y[incY3]); + dot += x0 * y0; dot += x1 * y1; dot += x2 * y2; dot += x3 * y3; + X += incX4; + Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (double)(*X); y0 = (double)(*Y); dot += x0 * y0; + X += INCX; Y += INCY; + } + } + return( dot ); +/* + * End of ATL_dsrefdot + */ +} diff --git a/sklearn/src/cblas/ATL_srefasum.c b/sklearn/src/cblas/ATL_srefasum.c new file mode 100644 index 0000000000000..aec26caf011ac --- /dev/null +++ b/sklearn/src/cblas/ATL_srefasum.c @@ -0,0 +1,133 @@ +/* --------------------------------------------------------------------- + * + * -- Automatically Tuned Linear Algebra Software (ATLAS) + * (C) Copyright 2000 All Rights Reserved + * + * -- ATLAS routine -- Version 3.9.24 -- December 25, 2000 + * + * Author : Antoine P. Petitet + * Originally developed at the University of Tennessee, + * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA. + * + * --------------------------------------------------------------------- + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in + * the documentation and/or other materials provided with the distri- + * bution. + * 3. The name of the University, the ATLAS group, or the names of its + * contributors may not be used to endorse or promote products deri- + * ved from this software without specific written permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO- + * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN- + * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "atlas_refmisc.h" +#include "atlas_reflevel1.h" + +float ATL_srefasum +( + const int N, + const float * X, + const int INCX +) +{ +/* + * Purpose + * ======= + * + * ATL_srefasum returns the sum of absolute values of the entries of a + * vector x. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. Unchanged on exit. + * + * X (input) const float * + * On entry, X points to the first entry to be accessed of an + * incremented array of size equal to or greater than + * ( 1 + ( n - 1 ) * abs( INCX ) ) * sizeof( float ), + * that contains the vector x. Unchanged on exit. + * + * INCX (input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. Unchanged on exit. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register float sum = ATL_sZERO, x0, x1, x2, x3, + x4, x5, x6, x7; + float * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; +/* .. + * .. Executable Statements .. + * + */ + if( ( N > 0 ) && ( INCX >= 1 ) ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = (float *)X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + sum += Msabs( x0 ); sum += Msabs( x4 ); + sum += Msabs( x1 ); sum += Msabs( x3 ); + sum += Msabs( x2 ); sum += Msabs( x6 ); + sum += Msabs( x5 ); sum += Msabs( x7 ); + + X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + sum += Msabs( x0 ); + X += INCX; + } + } + return( sum ); +/* + * End of ATL_srefasum + */ +} diff --git a/sklearn/src/cblas/cblas_sasum.c b/sklearn/src/cblas/cblas_sasum.c new file mode 100644 index 0000000000000..439707ba021f4 --- /dev/null +++ b/sklearn/src/cblas/cblas_sasum.c @@ -0,0 +1,44 @@ +/* + * Automatically Tuned Linear Algebra Software v3.10.2 + * (C) Copyright 1999 R. Clint Whaley + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the ATLAS group or the names of its contributers may + * not be used to endorse or promote products derived from this + * software without specific written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#define SREAL +#include "atlas_misc.h" +#ifdef ATL_USEPTHREADS + #include "atlas_ptalias1.h" +#endif +#include "atlas_level1.h" +#include "cblas.h" + +float cblas_sasum(const int N, const float *X, const int incX) +{ + if (N > 0 && incX > 0) + return(ATL_sasum(N, X, incX)); + return(0.0f); +} diff --git a/sklearn/src/cblas/cblas_saxpy.c b/sklearn/src/cblas/cblas_saxpy.c new file mode 100644 index 0000000000000..19600a53a5127 --- /dev/null +++ b/sklearn/src/cblas/cblas_saxpy.c @@ -0,0 +1,156 @@ +/* --------------------------------------------------------------------- + * + * -- Automatically Tuned Linear Algebra Software (ATLAS) + * (C) Copyright 2000 All Rights Reserved + * + * -- ATLAS routine -- Version 3.9.24 -- December 25, 2000 + * + * Author : Antoine P. Petitet + * Originally developed at the University of Tennessee, + * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA. + * + * --------------------------------------------------------------------- + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in + * the documentation and/or other materials provided with the distri- + * bution. + * 3. The name of the University, the ATLAS group, or the names of its + * contributors may not be used to endorse or promote products deri- + * ved from this software without specific written permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO- + * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (IN- + * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "atlas_refmisc.h" + +void cblas_saxpy +( + const int N, + const float ALPHA, + const float * X, + const int INCX, + float * Y, + const int INCY +) +{ +/* + * Purpose + * ======= + * + * ATL_srefaxpy performs the following operation: + * + * y := y + alpha * x, + * + * where alpha is a scalar and x and y are two n-vectors. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. Unchanged on exit. + * + * ALPHA (input) const float + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. Unchanged on exit. + * + * X (input) const float * + * On entry, X points to the first entry to be accessed of an + * incremented array of size equal to or greater than + * ( 1 + ( n - 1 ) * abs( INCX ) ) * sizeof( float ), + * that contains the vector x. Unchanged on exit. + * + * INCX (input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. Unchanged on exit. + * + * Y (input/output) float * + * On entry, Y points to the first entry to be accessed of an + * incremented array of size equal to or greater than + * ( 1 + ( n - 1 ) * abs( INCY ) ) * sizeof( float ), + * that contains the vector y. On exit, the entries of the in- + * cremented array Y are updated with the scaled entries of the + * incremented array X. + * + * INCY (input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. Unchanged on exit. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register const float alpha = ALPHA; + register float x0, x1, x2, x3, y0, y1, y2, y3; + float * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; +/* .. + * .. Executable Statements .. + * + */ + if( ( N > 0 ) && ( alpha != ATL_sZERO ) ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = (float *)X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + + *Y = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1; + Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3; + + X += incX4; + Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + y0 = (*Y); + + *Y = y0 + alpha * x0; + + X += INCX; + Y += INCY; + } + } +/* + * End of ATL_srefaxpy + */ +}