diff --git a/.gitignore b/.gitignore
index 9283cb47764e..206c63a9f29d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,6 +167,8 @@ numpy/core/src/umath/simd.inc
 numpy/core/src/umath/struct_ufunc_test.c
 numpy/core/src/umath/test_rational.c
 numpy/core/src/umath/umath_tests.c
+numpy/core/src/umath/clip.[ch]
+numpy/core/src/umath/_var_helper.[ch]
 numpy/distutils/__config__.py
 numpy/linalg/umath_linalg.c
 doc/source/**/generated/
diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py
index 269e509b860e..5bab3cee4b45 100644
--- a/numpy/core/_methods.py
+++ b/numpy/core/_methods.py
@@ -7,6 +7,7 @@
 
 import warnings
 
+import numpy as np
 from numpy.core import multiarray as mu
 from numpy.core import umath as um
 from numpy.core._asarray import asanyarray
@@ -166,8 +167,8 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False):
 
 def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
     arr = asanyarray(a)
-
     rcount = _count_reduce_items(arr, axis)
+
     # Make this warning show up on top.
     if ddof >= rcount:
         warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning,
@@ -187,16 +188,41 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
     else:
         arrmean = arrmean.dtype.type(arrmean / rcount)
 
-    # Compute sum of squared deviations from mean
-    # Note that x may not be inexact and that we need it to be an array,
-    # not a scalar.
-    x = asanyarray(arr - arrmean)
-    if issubclass(arr.dtype.type, (nt.floating, nt.integer)):
-        x = um.multiply(x, x, out=x)
-    else:
-        x = um.multiply(x, um.conjugate(x), out=x).real
+    if (rcount > 1 and arr.ndim and
+            issubclass(arr.dtype.type, (nt.floating, nt.complexfloating))):
+        if axis is None:
+            axis = tuple(range(arr.ndim))
+        elif isinstance(axis, int):
+            axis = (arr.ndim + axis, ) if axis < 0 else (axis, )
+        else:
+            axis = (arr.ndim + a if a < 0 else a for a in axis)
+            axis = tuple(sorted(axis))
+        
+        S1, S2 = mu._var_helper(
+            arr, np.broadcast_to(arrmean, arr.shape),
+            axis=axis[-1], keepdims=keepdims, dtype=dtype)
+
+        if len(axis) > 1:
+            S1 = umr_sum(S1, axis=axis[:-1], keepdims=keepdims)
+            S2 = umr_sum(S2, axis=axis[:-1], out=out, keepdims=keepdims)
+
+        if isinstance(S1, mu.ndarray):
+            S1 = um.multiply(S1, um.conjugate(S1), out=S1).real
+            S1 = um.true_divide(S1, rcount, out=S1, casting='unsafe')
+        else:
+            S1 = S2.dtype.type(abs(S1)**2 / rcount)
 
-    ret = umr_sum(x, axis, dtype, out, keepdims)
+        ret = um.subtract(S2, S1, out=out, casting='unsafe', dtype=dtype)
+    else:
+        # Compute sum of squared deviations from mean
+        # Note that x may not be inexact and that we need it to be an array,
+        # not a scalar.
+        x = asanyarray(arr - arrmean)
+        if issubclass(arr.dtype.type, (nt.floating, nt.integer)):
+            x = um.multiply(x, x, out=x)
+        else:
+            x = um.multiply(x, um.conjugate(x), out=x).real
+        ret = umr_sum(x, axis, dtype, out, keepdims)
 
     # Compute degrees of freedom and make sure it is not negative.
     rcount = max([rcount - ddof, 0])
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index bf1747272820..9e5e21571d0f 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -934,6 +934,20 @@ def english_upper(s):
           TD(O),
           signature='(n?,k),(k,m?)->(n?,m?)',
           ),
+'_var_helper':
+    Ufunc(2, 2, None,
+          "returns the sum of differences and sum of squared differences",
+          None,
+          [TypeDescription('e', None, 'ee', 'ee'),
+          TypeDescription('f', None, 'ff', 'ff'),
+          TypeDescription('d', None, 'dd', 'dd'),
+          TypeDescription('g', None, 'gg', 'gg'),
+          TypeDescription('F', None, 'FF', 'Ff'),
+          TypeDescription('D', None, 'DD', 'Dd'),
+          TypeDescription('G', None, 'GG', 'Gg'),
+          ],
+          signature='(i),(i)->(),()',
+          ),
 }
 
 if sys.version_info[0] >= 3:
@@ -1151,6 +1165,7 @@ def make_code(funcdict, filename):
     #include "loops.h"
     #include "matmul.h"
     #include "clip.h"
+    #include "_var_helper.h"
     %s
 
     static int
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index c0fcc10ffede..6d789c802d04 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -17,7 +17,7 @@
 from numpy.core._multiarray_umath import *
 from numpy.core._multiarray_umath import (
     _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string,
-    _ARRAY_API, _monotonicity, _get_ndarray_c_version
+    _ARRAY_API, _monotonicity, _get_ndarray_c_version, _var_helper
     )
 
 __all__ = [
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 338502791383..e8ca60e0bf0a 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -910,6 +910,8 @@ def generate_umath_c(ext, build_dir):
             join('src', 'umath', 'scalarmath.c.src'),
             join('src', 'umath', 'ufunc_type_resolution.c'),
             join('src', 'umath', 'override.c'),
+            join('src', 'umath', '_var_helper.h.src'),
+            join('src', 'umath', '_var_helper.c.src'),
             ]
 
     umath_deps = [
diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src
index 6c3bcce7133a..03ebffa7535c 100644
--- a/numpy/core/src/umath/_umath_tests.c.src
+++ b/numpy/core/src/umath/_umath_tests.c.src
@@ -361,10 +361,9 @@ static void *cross1d_data[] = { (void *)NULL, (void *)NULL };
 static char cross1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction euclidean_pdist_functions[] =
                             { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist };
-static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL };
+static void *euclidean_pdist_data[] = { (void *)NULL, (void *)NULL };
 static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT,
                                              NPY_DOUBLE, NPY_DOUBLE };
-
 static PyUFuncGenericFunction cumsum_functions[] = { LONG_cumsum, DOUBLE_cumsum };
 static void *cumsum_data[] = { (void *)NULL, (void *)NULL };
 static char cumsum_signatures[] = { NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE };
@@ -421,7 +420,7 @@ addUfuncs(PyObject *dictionary) {
     PyDict_SetItemString(dictionary, "matmul", f);
     Py_DECREF(f);
     f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions,
-                    eucldiean_pdist_data, euclidean_pdist_signatures,
+                    euclidean_pdist_data, euclidean_pdist_signatures,
                     2, 1, 1, PyUFunc_None, "euclidean_pdist",
                     "pairwise euclidean distance on last two dimensions \n"
                     "     \"(n,d)->(p)\" \n",
diff --git a/numpy/core/src/umath/_var_helper.c.src b/numpy/core/src/umath/_var_helper.c.src
new file mode 100644
index 000000000000..ffa9dc6cf202
--- /dev/null
+++ b/numpy/core/src/umath/_var_helper.c.src
@@ -0,0 +1,110 @@
+/**
+ * This module provides the inner loops for the var helper function ufunc
+ */
+#define _UMATHMODULE
+#define _MULTIARRAYMODULE
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#include "Python.h"
+
+#include "numpy/halffloat.h"
+#include "numpy/npy_math.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/npy_common.h"
+#include "numpy/utils.h"
+
+#define INIT_OUTER_LOOP \
+    npy_intp dN = *dimensions++;\
+    npy_intp N_;                \
+    npy_intp s0 = *steps++;     \
+    npy_intp s1 = *steps++;     \
+    npy_intp s2 = *steps++;     \
+    npy_intp s3 = *steps++;
+
+#define BEGIN_OUTER_LOOP      \
+    for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2, args[3] += s3) {
+
+#define END_OUTER_LOOP  }
+
+NPY_NO_EXPORT void
+HALF__var_helper(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP
+    npy_intp di = dimensions[0];
+    npy_intp i;
+    npy_intp is1 = steps[0];
+    npy_intp is2 = steps[1];
+    BEGIN_OUTER_LOOP
+        char *ip1 = args[0], *ip2 = args[1], *op1 = args[2], *op2 = args[3];
+        float S1 = 0, S2 = 0;
+        for (i = 0; i < di; i++, ip1 += is1, ip2 += is2) {
+            const float x = npy_half_to_float(*(npy_half *)ip1) -
+                            npy_half_to_float(*(npy_half *)ip2);
+            S1 += x;
+            S2 += x * x;
+        }
+        *(npy_half *)op1 = npy_float_to_half(S1);
+        *(npy_half *)op2 = npy_float_to_half(S2);
+    END_OUTER_LOOP
+}
+
+/**begin repeat
+ *
+ * #name = FLOAT, DOUBLE, LONGDOUBLE#
+ * #type = npy_float, npy_double, npy_longdouble#
+ */
+
+NPY_NO_EXPORT void
+@name@__var_helper(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP
+    npy_intp di = dimensions[0];
+    npy_intp i;
+    npy_intp is1 = steps[0];
+    npy_intp is2 = steps[1];
+    BEGIN_OUTER_LOOP
+        char *ip1 = args[0], *ip2 = args[1], *op1 = args[2], *op2 = args[3];
+        @type@ S1 = 0, S2 = 0;
+        for (i = 0; i < di; i++, ip1 += is1, ip2 += is2) {
+            const @type@ x = *(@type@ *)ip1 - *(@type@ *)ip2;
+            S1 += x;
+            S2 += x * x;
+        }
+        *(@type@ *)op1 = S1;
+        *(@type@ *)op2 = S2;
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+/**begin repeat
+ *
+ * #name = CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #type = npy_float, npy_double, npy_longdouble#
+ */
+
+NPY_NO_EXPORT void
+@name@__var_helper(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP
+    npy_intp di = dimensions[0];
+    npy_intp i;
+    npy_intp is1 = steps[0];
+    npy_intp is2 = steps[1];
+    BEGIN_OUTER_LOOP
+        char *ip1 = args[0], *ip2 = args[1], *op1 = args[2], *op2 = args[3];
+        @type@ S1_re = 0, S1_im = 0, S2 = 0;
+        for (i = 0; i < di; i++, ip1 += is1, ip2 += is2) {
+            const @type@ re = *(@type@ *)ip1 - *(@type@ *)ip2;
+            const @type@ im = *((@type@ *)ip1 + 1) - *((@type@ *)ip2 + 1);
+            S1_re += re;
+            S1_im += im;
+            S2 += re * re + im * im;
+        }
+        *(@type@ *)op1 = S1_re;
+        *((@type@ *)op1 + 1)= S1_im;
+        *(@type@ *)op2 = S2;
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
diff --git a/numpy/core/src/umath/_var_helper.h.src b/numpy/core/src/umath/_var_helper.h.src
new file mode 100644
index 000000000000..420df6df40b8
--- /dev/null
+++ b/numpy/core/src/umath/_var_helper.h.src
@@ -0,0 +1,14 @@
+#ifndef _NPY_UMATH__VAR_HELPER_H_
+#define _NPY_UMATH__VAR_HELPER_H_
+
+
+/**begin repeat
+ *
+ * #name = HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *         CFLOAT, CDOUBLE, CLONGDOUBLE#
+ */
+NPY_NO_EXPORT void
+@name@__var_helper(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+/**end repeat**/
+
+#endif