diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 2efbd87ac3b10..f04ae4cc41eca 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -263,6 +263,12 @@ Enhancements
      generating attribute ``estimators_samples_`` only when it is needed.
      By `David Staub`_.
 
+   - :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso`
+     now works with ``np.float32`` input data without converting it
+     into ``np.float64``. This allows to reduce the memory
+     consumption.
+     (`#6913 <https://github.com/scikit-learn/scikit-learn/pull/6913>`_)
+     By `YenChen Lin`_.
 
 Bug fixes
 .........
diff --git a/sklearn/linear_model/cd_fast.pyx b/sklearn/linear_model/cd_fast.pyx
index 847ef1e98cb4e..1dcd10feb1063 100644
--- a/sklearn/linear_model/cd_fast.pyx
+++ b/sklearn/linear_model/cd_fast.pyx
@@ -13,10 +13,16 @@ import numpy.linalg as linalg
 
 cimport cython
 from cpython cimport bool
+from cython cimport floating
 import warnings
 
 ctypedef np.float64_t DOUBLE
 ctypedef np.uint32_t UINT32_t
+ctypedef floating (*DOT)(int N, floating *X, int incX, floating *Y,
+                         int incY) nogil
+ctypedef void (*AXPY)(int N, floating alpha, floating *X, int incX,
+                      floating *Y, int incY) nogil
+ctypedef floating (*ASUM)(int N, floating *X, int incX) nogil
 
 np.import_array()
 
@@ -42,13 +48,13 @@ cdef inline UINT32_t rand_int(UINT32_t end, UINT32_t* random_state) nogil:
     return our_rand_r(random_state) % end
 
 
-cdef inline double fmax(double x, double y) nogil:
+cdef inline floating fmax(floating x, floating y) nogil:
     if x > y:
         return x
     return y
 
 
-cdef inline double fsign(double f) nogil:
+cdef inline floating fsign(floating f) nogil:
     if f == 0:
         return 0
     elif f > 0:
@@ -57,11 +63,11 @@ cdef inline double fsign(double f) nogil:
         return -1.0
 
 
-cdef double abs_max(int n, double* a) nogil:
+cdef floating abs_max(int n, floating* a) nogil:
     """np.max(np.abs(a))"""
     cdef int i
-    cdef double m = fabs(a[0])
-    cdef double d
+    cdef floating m = fabs(a[0])
+    cdef floating d
     for i in range(1, n):
         d = fabs(a[i])
         if d > m:
@@ -69,11 +75,11 @@ cdef double abs_max(int n, double* a) nogil:
     return m
 
 
-cdef double max(int n, double* a) nogil:
+cdef floating max(int n, floating* a) nogil:
     """np.max(a)"""
     cdef int i
-    cdef double m = a[0]
-    cdef double d
+    cdef floating m = a[0]
+    cdef floating d
     for i in range(1, n):
         d = a[i]
         if d > m:
@@ -81,11 +87,11 @@ cdef double max(int n, double* a) nogil:
     return m
 
 
-cdef double diff_abs_max(int n, double* a, double* b) nogil:
+cdef floating diff_abs_max(int n, floating* a, floating* b) nogil:
     """np.max(np.abs(a - b))"""
     cdef int i
-    cdef double m = fabs(a[0] - b[0])
-    cdef double d
+    cdef floating m = fabs(a[0] - b[0])
+    cdef floating d
     for i in range(1, n):
         d = fabs(a[i] - b[i])
         if d > m:
@@ -105,29 +111,35 @@ cdef extern from "cblas.h":
 
     void daxpy "cblas_daxpy"(int N, double alpha, double *X, int incX,
                              double *Y, int incY) nogil
+    void saxpy "cblas_saxpy"(int N, float alpha, float *X, int incX,
+                             float *Y, int incY) nogil
     double ddot "cblas_ddot"(int N, double *X, int incX, double *Y, int incY
                              ) nogil
+    float sdot "cblas_sdot"(int N, float *X, int incX, float *Y,
+                            int incY) nogil
     double dasum "cblas_dasum"(int N, double *X, int incX) nogil
+    float sasum "cblas_sasum"(int N, float *X, int incX) nogil
     void dger "cblas_dger"(CBLAS_ORDER Order, int M, int N, double alpha,
-                double *X, int incX, double *Y, int incY, double *A, int lda) nogil
-    void dgemv "cblas_dgemv"(CBLAS_ORDER Order,
-                      CBLAS_TRANSPOSE TransA, int M, int N,
-                      double alpha, double *A, int lda,
-                      double *X, int incX, double beta,
-                      double *Y, int incY) nogil
+                           double *X, int incX, double *Y, int incY,
+                           double *A, int lda) nogil
+    void dgemv "cblas_dgemv"(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA,
+                             int M, int N, double alpha, double *A, int lda,
+                             double *X, int incX, double beta,
+                             double *Y, int incY) nogil
     double dnrm2 "cblas_dnrm2"(int N, double *X, int incX) nogil
-    void dcopy "cblas_dcopy"(int N, double *X, int incX, double *Y, int incY) nogil
+    void dcopy "cblas_dcopy"(int N, double *X, int incX, double *Y,
+                             int incY) nogil
     void dscal "cblas_dscal"(int N, double alpha, double *X, int incX) nogil
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.cdivision(True)
-def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
-                            double alpha, double beta,
-                            np.ndarray[DOUBLE, ndim=2, mode='fortran'] X,
-                            np.ndarray[DOUBLE, ndim=1, mode='c'] y,
-                            int max_iter, double tol,
+def enet_coordinate_descent(np.ndarray[floating, ndim=1] w,
+                            floating alpha, floating beta,
+                            np.ndarray[floating, ndim=2, mode='fortran'] X,
+                            np.ndarray[floating, ndim=1, mode='c'] y,
+                            int max_iter, floating tol,
                             object rng, bint random=0, bint positive=0):
     """Cython version of the coordinate descent algorithm
         for Elastic-Net regression
@@ -138,31 +150,49 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
 
     """
 
+    # fused types version of BLAS functions
+    cdef DOT dot
+    cdef AXPY axpy
+    cdef ASUM asum
+
+    if floating is float:
+        dtype = np.float32
+        dot = sdot
+        axpy = saxpy
+        asum = sasum
+    else:
+        dtype = np.float64
+        dot = ddot
+        axpy = daxpy
+        asum = dasum
+
     # get the data information into easy vars
     cdef unsigned int n_samples = X.shape[0]
     cdef unsigned int n_features = X.shape[1]
 
     # get the number of tasks indirectly, using strides
-    cdef unsigned int n_tasks = y.strides[0] / sizeof(DOUBLE)
+    cdef unsigned int n_tasks = y.strides[0] / sizeof(floating)
 
     # compute norms of the columns of X
-    cdef np.ndarray[DOUBLE, ndim=1] norm_cols_X = (X**2).sum(axis=0)
+    cdef np.ndarray[floating, ndim=1] norm_cols_X = (X**2).sum(axis=0)
 
     # initial value of the residuals
-    cdef np.ndarray[DOUBLE, ndim=1] R = np.empty(n_samples)
-
-    cdef np.ndarray[DOUBLE, ndim=1] XtA = np.empty(n_features)
-    cdef double tmp
-    cdef double w_ii
-    cdef double d_w_max
-    cdef double w_max
-    cdef double d_w_ii
-    cdef double gap = tol + 1.0
-    cdef double d_w_tol = tol
-    cdef double dual_norm_XtA
-    cdef double R_norm2
-    cdef double w_norm2
-    cdef double l1_norm
+    cdef np.ndarray[floating, ndim=1] R = np.empty(n_samples, dtype=dtype)
+    cdef np.ndarray[floating, ndim=1] XtA = np.empty(n_features, dtype=dtype)
+
+    cdef floating tmp
+    cdef floating w_ii
+    cdef floating d_w_max
+    cdef floating w_max
+    cdef floating d_w_ii
+    cdef floating gap = tol + 1.0
+    cdef floating d_w_tol = tol
+    cdef floating dual_norm_XtA
+    cdef floating R_norm2
+    cdef floating w_norm2
+    cdef floating l1_norm
+    cdef floating const
+    cdef floating A_norm2
     cdef unsigned int ii
     cdef unsigned int i
     cdef unsigned int n_iter = 0
@@ -170,6 +200,12 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
     cdef UINT32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
     cdef UINT32_t* rand_r_state = &rand_r_state_seed
 
+    cdef floating *X_data = <floating*> X.data
+    cdef floating *y_data = <floating*> y.data
+    cdef floating *w_data = <floating*> w.data
+    cdef floating *R_data = <floating*> R.data
+    cdef floating *XtA_data = <floating*> XtA.data
+
     if alpha == 0:
         warnings.warn("Coordinate descent with alpha=0 may lead to unexpected"
             " results and is discouraged.")
@@ -177,13 +213,10 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
     with nogil:
         # R = y - np.dot(X, w)
         for i in range(n_samples):
-            R[i] = y[i] - ddot(n_features,
-                               <DOUBLE*>(X.data + i * sizeof(DOUBLE)),
-                               n_samples, <DOUBLE*>w.data, 1)
+            R[i] = y[i] - dot(n_features, &X_data[i], n_samples, w_data, 1)
 
         # tol *= np.dot(y, y)
-        tol *= ddot(n_samples, <DOUBLE*>y.data, n_tasks,
-                    <DOUBLE*>y.data, n_tasks)
+        tol *= dot(n_samples, y_data, n_tasks, y_data, n_tasks)
 
         for n_iter in range(max_iter):
             w_max = 0.0
@@ -201,14 +234,11 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
 
                 if w_ii != 0.0:
                     # R += w_ii * X[:,ii]
-                    daxpy(n_samples, w_ii,
-                          <DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
-                          1, <DOUBLE*>R.data, 1)
+                    axpy(n_samples, w_ii, &X_data[ii * n_samples], 1,
+                         R_data, 1)
 
                 # tmp = (X[:,ii]*R).sum()
-                tmp = ddot(n_samples,
-                           <DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
-                           1, <DOUBLE*>R.data, 1)
+                tmp = dot(n_samples, &X_data[ii * n_samples], 1, R_data, 1)
 
                 if positive and tmp < 0:
                     w[ii] = 0.0
@@ -218,9 +248,8 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
 
                 if w[ii] != 0.0:
                     # R -=  w[ii] * X[:,ii] # Update residual
-                    daxpy(n_samples, -w[ii],
-                          <DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)),
-                          1, <DOUBLE*>R.data, 1)
+                    axpy(n_samples, -w[ii], &X_data[ii * n_samples], 1,
+                         R_data, 1)
 
                 # update the maximum absolute coefficient update
                 d_w_ii = fabs(w[ii] - w_ii)
@@ -230,32 +259,28 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
                 if fabs(w[ii]) > w_max:
                     w_max = fabs(w[ii])
 
-            if (w_max == 0.0
-                    or d_w_max / w_max < d_w_tol
-                    or n_iter == max_iter - 1):
+            if (w_max == 0.0 or
+                d_w_max / w_max < d_w_tol or
+                n_iter == max_iter - 1):
                 # the biggest coordinate update of this iteration was smaller
                 # than the tolerance: check the duality gap as ultimate
                 # stopping criterion
 
                 # XtA = np.dot(X.T, R) - beta * w
                 for i in range(n_features):
-                    XtA[i] = ddot(
-                        n_samples,
-                        <DOUBLE*>(X.data + i * n_samples *sizeof(DOUBLE)),
-                        1, <DOUBLE*>R.data, 1) - beta * w[i]
+                    XtA[i] = dot(n_samples, &X_data[i * n_samples],
+                                 1, R_data, 1) - beta * w[i]
 
                 if positive:
-                    dual_norm_XtA = max(n_features, <DOUBLE*>XtA.data)
+                    dual_norm_XtA = max(n_features, XtA_data)
                 else:
-                    dual_norm_XtA = abs_max(n_features, <DOUBLE*>XtA.data)
+                    dual_norm_XtA = abs_max(n_features, XtA_data)
 
                 # R_norm2 = np.dot(R, R)
-                R_norm2 = ddot(n_samples, <DOUBLE*>R.data, 1,
-                               <DOUBLE*>R.data, 1)
+                R_norm2 = dot(n_samples, R_data, 1, R_data, 1)
 
                 # w_norm2 = np.dot(w, w)
-                w_norm2 = ddot(n_features, <DOUBLE*>w.data, 1,
-                               <DOUBLE*>w.data, 1)
+                w_norm2 = dot(n_features, w_data, 1, w_data, 1)
 
                 if (dual_norm_XtA > alpha):
                     const = alpha / dual_norm_XtA
@@ -265,33 +290,30 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
                     const = 1.0
                     gap = R_norm2
 
-                l1_norm = dasum(n_features, <DOUBLE*>w.data, 1)
+                l1_norm = asum(n_features, w_data, 1)
 
                 # np.dot(R.T, y)
-                gap += (alpha * l1_norm - const * ddot(
-                            n_samples,
-                            <DOUBLE*>R.data, 1,
-                            <DOUBLE*>y.data, n_tasks)
+                gap += (alpha * l1_norm
+                        - const * dot(n_samples, R_data, 1, y_data, n_tasks)
                         + 0.5 * beta * (1 + const ** 2) * (w_norm2))
 
                 if gap < tol:
                     # return if we reached desired tolerance
                     break
-
     return w, gap, tol, n_iter + 1
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.cdivision(True)
-def sparse_enet_coordinate_descent(double[:] w,
-                            double alpha, double beta,
-                            np.ndarray[double, ndim=1, mode='c'] X_data,
+def sparse_enet_coordinate_descent(floating [:] w,
+                            floating alpha, floating beta,
+                            np.ndarray[floating, ndim=1, mode='c'] X_data,
                             np.ndarray[int, ndim=1, mode='c'] X_indices,
                             np.ndarray[int, ndim=1, mode='c'] X_indptr,
-                            np.ndarray[double, ndim=1] y,
-                            double[:] X_mean, int max_iter,
-                            double tol, object rng, bint random=0,
+                            np.ndarray[floating, ndim=1] y,
+                            floating[:] X_mean, int max_iter,
+                            floating tol, object rng, bint random=0,
                             bint positive=0):
     """Cython version of the coordinate descent algorithm for Elastic-Net
 
@@ -307,30 +329,54 @@ def sparse_enet_coordinate_descent(double[:] w,
 
     # compute norms of the columns of X
     cdef unsigned int ii
-    cdef double[:] norm_cols_X = np.zeros(n_features, np.float64)
+    cdef floating[:] norm_cols_X
 
     cdef unsigned int startptr = X_indptr[0]
     cdef unsigned int endptr
 
     # get the number of tasks indirectly, using strides
-    cdef unsigned int n_tasks = y.strides[0] / sizeof(DOUBLE)
+    cdef unsigned int n_tasks
 
     # initial value of the residuals
-    cdef double[:] R = y.copy()
+    cdef floating[:] R = y.copy()
 
-    cdef double[:] X_T_R = np.zeros(n_features)
-    cdef double[:] XtA = np.zeros(n_features)
+    cdef floating[:] X_T_R
+    cdef floating[:] XtA
 
-    cdef double tmp
-    cdef double w_ii
-    cdef double d_w_max
-    cdef double w_max
-    cdef double d_w_ii
-    cdef double X_mean_ii
-    cdef double R_sum = 0.0
-    cdef double normalize_sum
-    cdef double gap = tol + 1.0
-    cdef double d_w_tol = tol
+    # fused types version of BLAS functions
+    cdef DOT dot
+    cdef ASUM asum
+
+    if floating is float:
+        dtype = np.float32
+        n_tasks = y.strides[0] / sizeof(float)
+        dot = sdot
+        asum = sasum
+    else:
+        dtype = np.float64
+        n_tasks = y.strides[0] / sizeof(DOUBLE)
+        dot = ddot
+        asum = dasum
+
+    norm_cols_X = np.zeros(n_features, dtype=dtype)
+    X_T_R = np.zeros(n_features, dtype=dtype)
+    XtA = np.zeros(n_features, dtype=dtype)
+
+    cdef floating tmp
+    cdef floating w_ii
+    cdef floating d_w_max
+    cdef floating w_max
+    cdef floating d_w_ii
+    cdef floating X_mean_ii
+    cdef floating R_sum = 0.0
+    cdef floating R_norm2
+    cdef floating w_norm2
+    cdef floating A_norm2
+    cdef floating l1_norm
+    cdef floating normalize_sum
+    cdef floating gap = tol + 1.0
+    cdef floating d_w_tol = tol
+    cdef floating dual_norm_XtA
     cdef unsigned int jj
     cdef unsigned int n_iter = 0
     cdef unsigned int f_iter
@@ -363,7 +409,7 @@ def sparse_enet_coordinate_descent(double[:] w,
             startptr = endptr
 
         # tol *= np.dot(y, y)
-        tol *= ddot(n_samples, <DOUBLE*>&y[0], 1, <DOUBLE*>&y[0], 1)
+        tol *= dot(n_samples, &y[0], 1, &y[0], 1)
 
         for n_iter in range(max_iter):
 
@@ -451,10 +497,10 @@ def sparse_enet_coordinate_descent(double[:] w,
                     dual_norm_XtA = abs_max(n_features, &XtA[0])
 
                 # R_norm2 = np.dot(R, R)
-                R_norm2 = ddot(n_samples, <DOUBLE*>&R[0], 1, <DOUBLE*>&R[0], 1)
+                R_norm2 = dot(n_samples, &R[0], 1, &R[0], 1)
 
                 # w_norm2 = np.dot(w, w)
-                w_norm2 = ddot(n_features, <DOUBLE*>&w[0], 1, <DOUBLE*>&w[0], 1)
+                w_norm2 = dot(n_features, &w[0], 1, &w[0], 1)
                 if (dual_norm_XtA > alpha):
                     const = alpha / dual_norm_XtA
                     A_norm2 = R_norm2 * const**2
@@ -463,13 +509,12 @@ def sparse_enet_coordinate_descent(double[:] w,
                     const = 1.0
                     gap = R_norm2
 
-                l1_norm = dasum(n_features, <DOUBLE*>&w[0], 1)
+                l1_norm = asum(n_features, &w[0], 1)
 
-                # The expression inside ddot is equivalent to np.dot(R.T, y)
-                gap += (alpha * l1_norm - const * ddot(
+                gap += (alpha * l1_norm - const * dot(
                             n_samples,
-                            <DOUBLE*>&R[0], 1,
-                            <DOUBLE*>&y[0], n_tasks
+                            &R[0], 1,
+                            &y[0], n_tasks
                             )
                         + 0.5 * beta * (1 + const ** 2) * w_norm2)
 
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 038a9830c59f3..45d8b265324d9 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -372,16 +372,18 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
     ElasticNet
     ElasticNetCV
     """
-    # We expect X and y to be already float64 Fortran ordered when bypassing
+    # We expect X and y to be already Fortran ordered when bypassing
     # checks
     if check_input:
-        X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
-        y = check_array(y, 'csc', dtype=np.float64, order='F', copy=False,
+        X = check_array(X, 'csc', dtype=[np.float64, np.float32],
+                        order='F', copy=copy_X)
+        y = check_array(y, 'csc', dtype=X.dtype.type, order='F', copy=False,
                         ensure_2d=False)
         if Xy is not None:
             # Xy should be a 1d contiguous array or a 2D C ordered array
-            Xy = check_array(Xy, dtype=np.float64, order='C', copy=False,
+            Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False,
                              ensure_2d=False)
+
     n_samples, n_features = X.shape
 
     multi_output = False
@@ -395,8 +397,9 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
             # As sparse matrices are not actually centered we need this
             # to be passed to the CD solver.
             X_sparse_scaling = params['X_offset'] / params['X_scale']
+            X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)
         else:
-            X_sparse_scaling = np.zeros(n_features)
+            X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)
 
     # X should be normalized and fit already if function is called
     # from ElasticNet.fit
@@ -426,15 +429,15 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
     random = (selection == 'random')
 
     if not multi_output:
-        coefs = np.empty((n_features, n_alphas), dtype=np.float64)
+        coefs = np.empty((n_features, n_alphas), dtype=X.dtype)
     else:
         coefs = np.empty((n_outputs, n_features, n_alphas),
-                         dtype=np.float64)
+                         dtype=X.dtype)
 
     if coef_init is None:
-        coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1]))
+        coef_ = np.asfortranarray(np.zeros(coefs.shape[:-1], dtype=X.dtype))
     else:
-        coef_ = np.asfortranarray(coef_init)
+        coef_ = np.asfortranarray(coef_init, dtype=X.dtype)
 
     for i, alpha in enumerate(alphas):
         l1_reg = alpha * l1_ratio * n_samples
@@ -470,7 +473,9 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
         if dual_gap_ > eps_:
             warnings.warn('Objective did not converge.' +
                           ' You might want' +
-                          ' to increase the number of iterations',
+                          ' to increase the number of iterations.' +
+                          ' Fitting data with very small alpha' +
+                          ' may cause precision problems.',
                           ConvergenceWarning)
 
         if verbose:
@@ -663,16 +668,16 @@ def fit(self, X, y, check_input=True):
             raise ValueError('precompute should be one of True, False or'
                              ' array-like. Got %r' % self.precompute)
 
-        # We expect X and y to be already float64 Fortran ordered arrays
+        # We expect X and y to be float64 or float32 Fortran ordered arrays
         # when bypassing checks
         if check_input:
-            y = np.asarray(y, dtype=np.float64)
-            X, y = check_X_y(X, y, accept_sparse='csc', dtype=np.float64,
-                             order='F',
+            X, y = check_X_y(X, y, accept_sparse='csc',
+                             order='F', dtype=[np.float64, np.float32],
                              copy=self.copy_X and self.fit_intercept,
                              multi_output=True, y_numeric=True)
-            y = check_array(y, dtype=np.float64, order='F', copy=False,
+            y = check_array(y, order='F', copy=False, dtype=X.dtype.type,
                             ensure_2d=False)
+
         X, y, X_offset, y_offset, X_scale, precompute, Xy = \
             _pre_fit(X, y, None, self.precompute, self.normalize,
                      self.fit_intercept, copy=False)
@@ -688,14 +693,14 @@ def fit(self, X, y, check_input=True):
             raise ValueError("selection should be either random or cyclic.")
 
         if not self.warm_start or self.coef_ is None:
-            coef_ = np.zeros((n_targets, n_features), dtype=np.float64,
+            coef_ = np.zeros((n_targets, n_features), dtype=X.dtype,
                              order='F')
         else:
             coef_ = self.coef_
             if coef_.ndim == 1:
                 coef_ = coef_[np.newaxis, :]
 
-        dual_gaps_ = np.zeros(n_targets, dtype=np.float64)
+        dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)
         self.n_iter_ = []
 
         for k in xrange(n_targets):
@@ -725,6 +730,9 @@ def fit(self, X, y, check_input=True):
         self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
         self._set_intercept(X_offset, y_offset, X_scale)
 
+        # workaround since _set_intercept will cast self.coef_ into float64
+        self.coef_ = np.asarray(self.coef_, dtype=X.dtype)
+
         # return self for chaining fit and predict calls
         return self
 
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 918180ce18915..9065c5b97dc4f 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -670,3 +670,32 @@ def test_lasso_non_float_y():
         clf_float = model(fit_intercept=False)
         clf_float.fit(X, y_float)
         assert_array_equal(clf.coef_, clf_float.coef_)
+
+
+def test_enet_float_precision():
+    # Generate dataset
+    X, y, X_test, y_test = build_dataset(n_samples=20, n_features=10)
+    # Here we have a small number of iterations, and thus the
+    # ElasticNet might not converge. This is to speed up tests
+
+    for normalize in [True, False]:
+        for fit_intercept in [True, False]:
+            coef = {}
+            intercept = {}
+            clf = ElasticNet(alpha=0.5, max_iter=100, precompute=False,
+                            fit_intercept=fit_intercept, normalize=normalize)
+            for dtype in [np.float64, np.float32]:
+                X = dtype(X)
+                y = dtype(y)
+                ignore_warnings(clf.fit)(X, y)
+
+                coef[dtype] = clf.coef_
+                intercept[dtype] = clf.intercept_
+
+                assert_equal(clf.coef_.dtype, dtype)
+
+            assert_array_almost_equal(coef[np.float32], coef[np.float64],
+                                    decimal=4)
+            assert_array_almost_equal(intercept[np.float32],
+                                    intercept[np.float64],
+                                    decimal=4)
diff --git a/sklearn/src/cblas/ATL_dsrefdot.c b/sklearn/src/cblas/ATL_dsrefdot.c
new file mode 100644
index 0000000000000..442e51a08e207
--- /dev/null
+++ b/sklearn/src/cblas/ATL_dsrefdot.c
@@ -0,0 +1,141 @@
+/* ---------------------------------------------------------------------
+ *
+ * -- Automatically Tuned Linear Algebra Software (ATLAS)
+ *    (C) Copyright 2000 All Rights Reserved
+ *
+ * -- ATLAS routine -- Version 3.2 -- December 25, 2000
+ *
+ * Author         : Antoine P. Petitet
+ * Originally developed at the University of Tennessee,
+ * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA.
+ *
+ * ---------------------------------------------------------------------
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ *  Redistribution  and  use in  source and binary forms, with or without
+ *  modification, are  permitted provided  that the following  conditions
+ *  are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ *    notice,  this list of conditions, and the  following disclaimer in
+ *    the documentation and/or other materials provided with the distri-
+ *    bution.
+ * 3. The name of the University,  the ATLAS group,  or the names of its
+ *    contributors  may not be used to endorse or promote products deri-
+ *    ved from this software without specific written permission.
+ *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,  INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO,  PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
+ * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  (IN-
+ * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Include files
+ */
+#include "atlas_refmisc.h"
+#include "atlas_reflevel1.h"
+
+double ATL_dsrefdot
+(
+   const int                  N,
+   const float                * X,
+   const int                  INCX,
+   const float                * Y,
+   const int                  INCY
+)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * ATL_dsrefdot  returns the dot product x^T * y of two n-vectors x and
+ * y.  The result is internally computed using double precision arithme-
+ * tic.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero. Unchanged on exit.
+ *
+ * X       (input)                       const float *
+ *         On entry,  X  points to the  first entry to be accessed of an
+ *         incremented array of size equal to or greater than
+ *            ( 1 + ( n - 1 ) * abs( INCX ) ) * sizeof(   float   ),
+ *         that contains the vector x. Unchanged on exit.
+ *
+ * INCX    (input)                       const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero. Unchanged on exit.
+ *
+ * Y       (input)                       const float *
+ *         On entry,  Y  points to the  first entry to be accessed of an
+ *         incremented array of size equal to or greater than
+ *            ( 1 + ( n - 1 ) * abs( INCY ) ) * sizeof(   float   ),
+ *         that contains the vector y. Unchanged on exit.
+ *
+ * INCY    (input)                       const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero. Unchanged on exit.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   register double            dot = ATL_dZERO, x0, x1, x2, x3, y0, y1, y2, y3;
+   float                      * StX;
+   register int               i;
+   int                        nu;
+   const int                  incX2 = 2 * INCX, incY2 = 2 * INCY,
+                              incX3 = 3 * INCX, incY3 = 3 * INCY,
+                              incX4 = 4 * INCX, incY4 = 4 * INCY;
+/* ..
+ * .. Executable Statements ..
+ *
+ */
+   if( N > 0 )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = (float *)X + nu * INCX;
+
+         do
+         {
+            x0 = (double)(*X);       y0 = (double)(*Y);
+            x1 = (double)(X[INCX ]); y1 = (double)(Y[INCY ]);
+            x2 = (double)(X[incX2]); y2 = (double)(Y[incY2]);
+            x3 = (double)(X[incX3]); y3 = (double)(Y[incY3]);
+            dot += x0 * y0; dot += x1 * y1; dot += x2 * y2; dot += x3 * y3;
+            X  += incX4;
+            Y  += incY4;
+
+         } while( X != StX );
+      }
+
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0 = (double)(*X); y0 = (double)(*Y); dot += x0 * y0;
+         X += INCX; Y += INCY;
+      }
+   }
+   return( dot );
+/*
+ * End of ATL_dsrefdot
+ */
+}
diff --git a/sklearn/src/cblas/ATL_srefasum.c b/sklearn/src/cblas/ATL_srefasum.c
new file mode 100644
index 0000000000000..aec26caf011ac
--- /dev/null
+++ b/sklearn/src/cblas/ATL_srefasum.c
@@ -0,0 +1,133 @@
+/* ---------------------------------------------------------------------
+ *
+ * -- Automatically Tuned Linear Algebra Software (ATLAS)
+ *    (C) Copyright 2000 All Rights Reserved
+ *
+ * -- ATLAS routine -- Version 3.9.24 -- December 25, 2000
+ *
+ * Author         : Antoine P. Petitet
+ * Originally developed at the University of Tennessee,
+ * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA.
+ *
+ * ---------------------------------------------------------------------
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ *  Redistribution  and  use in  source and binary forms, with or without
+ *  modification, are  permitted provided  that the following  conditions
+ *  are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ *    notice,  this list of conditions, and the  following disclaimer in
+ *    the documentation and/or other materials provided with the distri-
+ *    bution.
+ * 3. The name of the University,  the ATLAS group,  or the names of its
+ *    contributors  may not be used to endorse or promote products deri-
+ *    ved from this software without specific written permission.
+ *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,  INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO,  PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
+ * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  (IN-
+ * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Include files
+ */
+#include "atlas_refmisc.h"
+#include "atlas_reflevel1.h"
+
+float ATL_srefasum
+(
+   const int                  N,
+   const float                * X,
+   const int                  INCX
+)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * ATL_srefasum   returns the sum of absolute values of the entries of a
+ * vector x.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero. Unchanged on exit.
+ *
+ * X       (input)                       const float *
+ *         On entry,  X  points to the  first entry to be accessed of an
+ *         incremented array of size equal to or greater than
+ *            ( 1 + ( n - 1 ) * abs( INCX ) ) * sizeof(   float   ),
+ *         that contains the vector x. Unchanged on exit.
+ *
+ * INCX    (input)                       const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero. Unchanged on exit.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   register float             sum = ATL_sZERO, x0, x1, x2, x3,
+                              x4, x5, x6, x7;
+   float                      * StX;
+   register int               i;
+   int                        nu;
+   const int                  incX2 = 2 * INCX, incX3 = 3 * INCX,
+                              incX4 = 4 * INCX, incX5 = 5 * INCX,
+                              incX6 = 6 * INCX, incX7 = 7 * INCX,
+                              incX8 = 8 * INCX;
+/* ..
+ * .. Executable Statements ..
+ *
+ */
+   if( ( N > 0 ) && ( INCX >= 1 ) )
+   {
+      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
+      {
+         StX = (float *)X + nu * INCX;
+
+         do
+         {
+            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
+            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
+
+            sum += Msabs( x0 ); sum += Msabs( x4 );
+            sum += Msabs( x1 ); sum += Msabs( x3 );
+            sum += Msabs( x2 ); sum += Msabs( x6 );
+            sum += Msabs( x5 ); sum += Msabs( x7 );
+
+            X  += incX8;
+
+         } while( X != StX );
+      }
+
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0   = (*X);
+         sum += Msabs( x0 );
+         X   += INCX;
+      }
+   }
+   return( sum );
+/*
+ * End of ATL_srefasum
+ */
+}
diff --git a/sklearn/src/cblas/cblas_sasum.c b/sklearn/src/cblas/cblas_sasum.c
new file mode 100644
index 0000000000000..439707ba021f4
--- /dev/null
+++ b/sklearn/src/cblas/cblas_sasum.c
@@ -0,0 +1,44 @@
+/*
+ *             Automatically Tuned Linear Algebra Software v3.10.2
+ *                    (C) Copyright 1999 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions, and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *   3. The name of the ATLAS group or the names of its contributers may
+ *      not be used to endorse or promote products derived from this
+ *      software without specific written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define SREAL
+#include "atlas_misc.h"
+#ifdef ATL_USEPTHREADS
+   #include "atlas_ptalias1.h"
+#endif
+#include "atlas_level1.h"
+#include "cblas.h"
+
+float cblas_sasum(const int N, const float *X, const int incX)
+{
+   if (N > 0 && incX > 0)
+      return(ATL_sasum(N, X, incX));
+   return(0.0f);
+}
diff --git a/sklearn/src/cblas/cblas_saxpy.c b/sklearn/src/cblas/cblas_saxpy.c
new file mode 100644
index 0000000000000..19600a53a5127
--- /dev/null
+++ b/sklearn/src/cblas/cblas_saxpy.c
@@ -0,0 +1,156 @@
+/* ---------------------------------------------------------------------
+ *
+ * -- Automatically Tuned Linear Algebra Software (ATLAS)
+ *    (C) Copyright 2000 All Rights Reserved
+ *
+ * -- ATLAS routine -- Version 3.9.24 -- December 25, 2000
+ *
+ * Author         : Antoine P. Petitet
+ * Originally developed at the University of Tennessee,
+ * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA.
+ *
+ * ---------------------------------------------------------------------
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ *  Redistribution  and  use in  source and binary forms, with or without
+ *  modification, are  permitted provided  that the following  conditions
+ *  are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ *    notice,  this list of conditions, and the  following disclaimer in
+ *    the documentation and/or other materials provided with the distri-
+ *    bution.
+ * 3. The name of the University,  the ATLAS group,  or the names of its
+ *    contributors  may not be used to endorse or promote products deri-
+ *    ved from this software without specific written permission.
+ *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,  INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO,  PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
+ * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  (IN-
+ * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * Include files
+ */
+#include "atlas_refmisc.h"
+
+void cblas_saxpy
+(
+   const int                  N,
+   const float                ALPHA,
+   const float                * X,
+   const int                  INCX,
+   float                      * Y,
+   const int                  INCY
+)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * ATL_srefaxpy performs the following operation:
+ *
+ *    y := y + alpha * x,
+ *
+ * where alpha is a scalar and x and y are two n-vectors.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero. Unchanged on exit.
+ *
+ * ALPHA   (input)                       const float
+ *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+ *         supplied as zero, then the entries of the incremented array X
+ *         need not be set on input. Unchanged on exit.
+ *
+ * X       (input)                       const float *
+ *         On entry,  X  points to the  first entry to be accessed of an
+ *         incremented array of size equal to or greater than
+ *            ( 1 + ( n - 1 ) * abs( INCX ) ) * sizeof(   float   ),
+ *         that contains the vector x. Unchanged on exit.
+ *
+ * INCX    (input)                       const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero. Unchanged on exit.
+ *
+ * Y       (input/output)                float *
+ *         On entry,  Y  points to the  first entry to be accessed of an
+ *         incremented array of size equal to or greater than
+ *            ( 1 + ( n - 1 ) * abs( INCY ) ) * sizeof(   float   ),
+ *         that contains the vector y.  On exit,  the entries of the in-
+ *         cremented array  Y are updated with the scaled entries of the
+ *         incremented array  X.
+ *
+ * INCY    (input)                       const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero. Unchanged on exit.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   register const float       alpha = ALPHA;
+   register float             x0, x1, x2, x3, y0, y1, y2, y3;
+   float                      * StX;
+   register int               i;
+   int                        nu;
+   const int                  incX2 = 2 * INCX, incY2 = 2 * INCY,
+                              incX3 = 3 * INCX, incY3 = 3 * INCY,
+                              incX4 = 4 * INCX, incY4 = 4 * INCY;
+/* ..
+ * .. Executable Statements ..
+ *
+ */
+   if( ( N > 0 ) && ( alpha != ATL_sZERO ) )
+   {
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
+      {
+         StX = (float *)X + nu * INCX;
+
+         do
+         {
+            x0 = (*X);     y0 = (*Y);     x1 = X[INCX ]; y1 = Y[INCY ];
+            x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3];
+
+            *Y       = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1;
+            Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3;
+
+            X  += incX4;
+            Y  += incY4;
+
+         } while( X != StX );
+      }
+
+      for( i = N - nu; i != 0; i-- )
+      {
+         x0  = (*X);
+         y0  = (*Y);
+
+         *Y  = y0 + alpha * x0;
+
+         X  += INCX;
+         Y  += INCY;
+      }
+   }
+/*
+ * End of ATL_srefaxpy
+ */
+}