MNT: Use GEMV in enet_coordinate_descent

jakirkham · jakirkham · commit d128dd03229e · 2018-08-22T21:02:52.000-04:00
Make use of the BLAS GEMV operation in `enet_coordinate_descent` instead
of using DOT in a `for`-loop. They are both semantically equivalent, but
the former is likely multithreaded in BLAS implementations while here it
is merely a serial loop.
diff --git a/sklearn/linear_model/cd_fast.pyx b/sklearn/linear_model/cd_fast.pyx
@@ -159,14 +159,18 @@ def enet_coordinate_descent(floating[::1] w,
     # fused types version of BLAS functions
     if floating is float:
         dtype = np.float32
+        gemv = sgemv
         dot = sdot
         axpy = saxpy
         asum = sasum
+        copy = scopy
     else:
         dtype = np.float64
+        gemv = dgemv
         dot = ddot
         axpy = daxpy
         asum = dasum
+        copy = dcopy
 
     # get the data information into easy vars
     cdef unsigned int n_samples = X.shape[0]
@@ -205,8 +209,11 @@ def enet_coordinate_descent(floating[::1] w,
 
     with nogil:
         # R = y - np.dot(X, w)
-        for i in range(n_samples):
-            R[i] = y[i] - dot(n_features, &X[i, 0], n_samples, &w[0], 1)
+        copy(n_samples, &y[0], 1, &R[0], 1)
+        gemv(CblasColMajor, CblasNoTrans,
+             n_samples, n_features, -1.0, &X[0, 0], n_samples,
+             &w[0], 1,
+             1.0, &R[0], 1)
 
         # tol *= np.dot(y, y)
         tol *= dot(n_samples, &y[0], 1, &y[0], 1)
@@ -258,9 +265,11 @@ def enet_coordinate_descent(floating[::1] w,
                 # stopping criterion
 
                 # XtA = np.dot(X.T, R) - beta * w
-                for i in range(n_features):
-                    XtA[i] = (dot(n_samples, &X[0, i], 1, &R[0], 1)
-                              - beta * w[i])
+                copy(n_features, &w[0], 1, &XtA[0], 1)
+                gemv(CblasColMajor, CblasTrans,
+                     n_samples, n_features, 1.0, &X[0, 0], n_samples,
+                     &R[0], 1,
+                     -beta, &XtA[0], 1)
 
                 if positive:
                     dual_norm_XtA = max(n_features, &XtA[0])