diff --git a/.gitmodules b/.gitmodules index 9847e2842fbc..3934afe4500c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "numpy/_core/src/highway"] path = numpy/_core/src/highway url = https://github.com/google/highway.git +[submodule "numpy/fft/pocketfft"] + path = numpy/fft/pocketfft + url = https://github.com/mreineck/pocketfft diff --git a/doc/release/upcoming_changes/25536.new_feature.rst b/doc/release/upcoming_changes/25536.new_feature.rst index 28f0747a6c4f..d57f144a646f 100644 --- a/doc/release/upcoming_changes/25536.new_feature.rst +++ b/doc/release/upcoming_changes/25536.new_feature.rst @@ -1,4 +1,11 @@ -``out`` for `numpy.fft` ------------------------ -The various FFT routines in `numpy.fft` have gained an ``out`` -argument that can be used for in-place calculations. +`numpy.fft` support for different precisions and in-place calculations +---------------------------------------------------------------------- + +The various FFT routines in `numpy.fft` now do their calculations natively in +float, double, or long double precision, depending on the input precision, +instead of always calculating in double precision. Hence, the calculation will +now be less precise for single and more precise for long double precision. +The data type of the output array will now be adjusted accordingly. + +Furthermore, all FFT routines have gained an ``out`` argument that can be used +for in-place calculations. diff --git a/numpy/fft/_pocketfft.py b/numpy/fft/_pocketfft.py index 1ef37b3c01a3..bb754d01a64a 100644 --- a/numpy/fft/_pocketfft.py +++ b/numpy/fft/_pocketfft.py @@ -34,7 +34,8 @@ import warnings from numpy.lib.array_utils import normalize_axis_index -from numpy._core import asarray, empty, zeros, swapaxes, conjugate, take, sqrt +from numpy._core import (asarray, empty, zeros, swapaxes, result_type, + conjugate, take, sqrt, reciprocal) from . import _pocketfft_umath as pfu from numpy._core import overrides @@ -47,12 +48,24 @@ # divided. This replaces the original, more intuitive 'fct` parameter to avoid # divisions by zero (or alternatively additional checks) in the case of # zero-length axes during its computation. -def _raw_fft(a, n, axis, is_real, is_forward, inv_norm, out=None): - axis = normalize_axis_index(axis, a.ndim) - if n is None: - n = a.shape[axis] +def _raw_fft(a, n, axis, is_real, is_forward, norm, out=None): + if n < 1: + raise ValueError(f"Invalid number of FFT data points ({n}) specified.") + + # Calculate the normalization factor, passing in the array dtype to + # avoid precision loss in the possible sqrt or reciprocal. + if not is_forward: + norm = _swap_direction(norm) - fct = 1/inv_norm + if norm is None or norm == "backward": + fct = 1 + elif norm == "ortho": + fct = reciprocal(sqrt(n, dtype=a.real.dtype)) + elif norm == "forward": + fct = reciprocal(n, dtype=a.real.dtype) + else: + raise ValueError(f'Invalid norm value {norm}; should be "backward",' + '"ortho" or "forward".') n_out = n if is_real: @@ -64,47 +77,20 @@ def _raw_fft(a, n, axis, is_real, is_forward, inv_norm, out=None): else: ufunc = pfu.fft if is_forward else pfu.ifft + axis = normalize_axis_index(axis, a.ndim) + if out is None: + if is_real and not is_forward: # irfft, complex in, real output. + out_dtype = result_type(a.real.dtype, 1.0) + else: # Others, complex output. + out_dtype = result_type(a.dtype, 1j) out = empty(a.shape[:axis] + (n_out,) + a.shape[axis+1:], - dtype=complex if is_forward or not is_real else float) + dtype=out_dtype) elif ((shape := getattr(out, "shape", None)) is not None and (len(shape) != a.ndim or shape[axis] != n_out)): raise ValueError("output array has wrong shape.") - # Note: for backward compatibility, we want to accept longdouble as well, - # even though it is at reduced precision. To tell the promotor that we - # want to do that, we set the signature (to the only the ufunc has). - # Then, the default casting='same_kind' will take care of the rest. - # TODO: create separate float, double, and longdouble loops. - return ufunc(a, fct, axes=[(axis,), (), (axis,)], out=out, - signature=ufunc.types[0]) - -def _get_forward_norm(n, norm): - if n < 1: - raise ValueError(f"Invalid number of FFT data points ({n}) specified.") - - if norm is None or norm == "backward": - return 1 - elif norm == "ortho": - return sqrt(n) - elif norm == "forward": - return n - raise ValueError(f'Invalid norm value {norm}; should be "backward",' - '"ortho" or "forward".') - - -def _get_backward_norm(n, norm): - if n < 1: - raise ValueError(f"Invalid number of FFT data points ({n}) specified.") - - if norm is None or norm == "backward": - return n - elif norm == "ortho": - return sqrt(n) - elif norm == "forward": - return 1 - raise ValueError(f'Invalid norm value {norm}; should be "backward", ' - '"ortho" or "forward".') + return ufunc(a, fct, axes=[(axis,), (), (axis,)], out=out) _SWAP_DIRECTION_MAP = {"backward": "forward", None: "forward", @@ -220,8 +206,7 @@ def fft(a, n=None, axis=-1, norm=None, out=None): a = asarray(a) if n is None: n = a.shape[axis] - inv_norm = _get_forward_norm(n, norm) - output = _raw_fft(a, n, axis, False, True, inv_norm, out) + output = _raw_fft(a, n, axis, False, True, norm, out) return output @@ -327,8 +312,7 @@ def ifft(a, n=None, axis=-1, norm=None, out=None): a = asarray(a) if n is None: n = a.shape[axis] - inv_norm = _get_backward_norm(n, norm) - output = _raw_fft(a, n, axis, False, False, inv_norm, out=out) + output = _raw_fft(a, n, axis, False, False, norm, out=out) return output @@ -426,8 +410,7 @@ def rfft(a, n=None, axis=-1, norm=None, out=None): a = asarray(a) if n is None: n = a.shape[axis] - inv_norm = _get_forward_norm(n, norm) - output = _raw_fft(a, n, axis, True, True, inv_norm, out=out) + output = _raw_fft(a, n, axis, True, True, norm, out=out) return output @@ -536,8 +519,7 @@ def irfft(a, n=None, axis=-1, norm=None, out=None): a = asarray(a) if n is None: n = (a.shape[axis] - 1) * 2 - inv_norm = _get_backward_norm(n, norm) - output = _raw_fft(a, n, axis, True, False, inv_norm, out=out) + output = _raw_fft(a, n, axis, True, False, norm, out=out) return output diff --git a/numpy/fft/_pocketfft_umath.c b/numpy/fft/_pocketfft_umath.c deleted file mode 100644 index cf68d998c285..000000000000 --- a/numpy/fft/_pocketfft_umath.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * This file is part of pocketfft. - * Licensed under a 3-clause BSD style license - see LICENSE.md - */ - -/* - * Main implementation file. - * - * Copyright (C) 2004-2018 Max-Planck-Society - * \author Martin Reinecke - */ -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#define PY_SSIZE_T_CLEAN -#include -#include - -#include "numpy/arrayobject.h" -#include "numpy/ufuncobject.h" - -#include "npy_config.h" - -#include "pocketfft/pocketfft.h" - -/* - * Copy all nin elements of input to the first nin of the output, - * and any set any remaining nout-nin output elements to 0 - * (if nout < nin, copy only nout). - */ - -static inline void -copy_data(char* in, npy_intp step_in, npy_intp nin, - char* out, npy_intp step_out, npy_intp nout, npy_intp elsize) -{ - npy_intp ncopy = nin <= nout? nin : nout; - if (ncopy > 0) { - if (step_in == elsize && step_out == elsize) { - memcpy(out, in, ncopy*elsize); - } - else { - char *ip = in, *op = out; - for (npy_intp i = 0; i < ncopy; i++, ip += step_in, op += step_out) { - memcpy(op, ip, elsize); - } - } - } - else { - ncopy = 0; /* can be negative, from irfft */ - } - if (nout > ncopy) { - char *op = out + ncopy*elsize; - if (step_out == elsize) { - memset(op, 0, (nout-ncopy)*elsize); - } - else { - for (npy_intp i = ncopy; i < nout; i++, op += step_out) { - memset(op, 0, elsize); - } - } - } -} - - -/* - * Loops calling the pocketfft code. - * - * Unfortunately, the gufunc machinery does not (yet?) allow forcing contiguous - * inner loop data, so we create a contiguous output buffer if needed - * (input gets copied to output before processing, so can be non-contiguous). - */ -static void -fft_loop(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func) -{ - char *ip = args[0], *fp = args[1], *op = args[2]; - npy_intp n_outer = dimensions[0]; - npy_intp si = steps[0], sf = steps[1], so = steps[2]; - npy_intp nin = dimensions[1], nout = dimensions[2]; - npy_intp step_in = steps[3], step_out = steps[4]; - int (*cfft_function)(cfft_plan, double *, double) = func; - npy_intp npts = nout; - cfft_plan plan; - char *buff = NULL; - int no_mem = 1; - - if (nout == 0) { - return; /* no output to set */ - } - - plan = make_cfft_plan(npts); - if (plan == NULL) { - goto fail; - } - if (step_out != sizeof(npy_cdouble)) { - buff = malloc(npts * sizeof(npy_cdouble)); - if (buff == NULL) { - goto fail; - } - } - - for (npy_intp i = 0; i < n_outer; i++, ip += si, fp += sf, op += so) { - double fct = *(double *)fp; - char *op_or_buff = buff == NULL ? op : buff; - /* - * pocketfft works in-place, so we need to copy the data - * (except if we want to be in-place) - */ - if (ip != op_or_buff) { - copy_data(ip, step_in, nin, - op_or_buff, sizeof(npy_cdouble), npts, sizeof(npy_cdouble)); - } - if ((no_mem = cfft_function(plan, (double *)op_or_buff, fct)) != 0) { - break; - } - if (op_or_buff == buff) { - copy_data(op_or_buff, sizeof(npy_cdouble), npts, - op, step_out, npts, sizeof(npy_cdouble)); - } - } - fail: - free(buff); - destroy_cfft_plan(plan); /* uses free so can be passed NULL */ - if (no_mem) { - /* TODO: Requires use of new ufunc API to indicate error return */ - NPY_ALLOW_C_API_DEF - NPY_ALLOW_C_API; - PyErr_NoMemory(); - NPY_DISABLE_C_API; - } - return; -} - - - -static void -rfft_impl(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func, - npy_intp npts) -{ - char *ip = args[0], *fp = args[1], *op = args[2]; - npy_intp n_outer = dimensions[0]; - npy_intp si = steps[0], sf = steps[1], so = steps[2]; - npy_intp nin = dimensions[1], nout = dimensions[2]; - npy_intp step_in = steps[3], step_out = steps[4]; - rfft_plan plan; - char *buff = NULL; - int no_mem = 1; - - if (nout == 0) { - return; - } - - plan = make_rfft_plan(npts); - if (plan == NULL) { - goto fail; - } - if (step_out != sizeof(npy_cdouble)){ - buff = malloc(nout * sizeof(npy_cdouble)); - if (buff == NULL) { - goto fail; - } - } - - for (npy_intp i = 0; i < n_outer; i++, ip += si, fp += sf, op += so) { - double fct = *(double *)fp; - char *op_or_buff = buff == NULL ? op : buff; - double *op_double = (double *)op_or_buff; - /* - * Pocketfft works in-place and for real transforms the frequency data - * thus needs to be compressed, using that there will be no imaginary - * component for the zero-frequency item (which is the sum of all - * inputs and thus has to be real), nor one for the Nyquist frequency - * for even number of points. Pocketfft uses FFTpack order, - * R0,R1,I1,...Rn-1,In-1,Rn[,In] (last for npts odd only). To make - * unpacking easy, we place the real data offset by one in the buffer, - * so that we just have to move R0 and create I0=0. Note that - * copy_data will zero the In component for even number of points. - */ - copy_data(ip, step_in, nin, - (char *)&op_double[1], sizeof(npy_double), nout*2 - 1, sizeof(npy_double)); - if ((no_mem = rfft_forward(plan, &op_double[1], fct)) != 0) { - break; - } - op_double[0] = op_double[1]; - op_double[1] = 0.; - if (op_or_buff == buff) { - copy_data(op_or_buff, sizeof(npy_cdouble), nout, - op, step_out, nout, sizeof(npy_cdouble)); - } - } - fail: - free(buff); - destroy_rfft_plan(plan); - if (no_mem) { - /* TODO: Requires use of new ufunc API to indicate error return */ - NPY_ALLOW_C_API_DEF - NPY_ALLOW_C_API; - PyErr_NoMemory(); - NPY_DISABLE_C_API; - } - return; -} - -/* - * For the forward real, we cannot know what the requested number of points is - * just based on the number of points in the complex output array (e.g., 10 - * and 11 real input points both lead to 6 complex output points), so we - * define versions for both even and odd number of points. - */ -static void -rfft_n_even_loop(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func) -{ - npy_intp npts = 2 * dimensions[2] - 2; - rfft_impl(args, dimensions, steps, func, npts); -} - -static void -rfft_n_odd_loop(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func) -{ - npy_intp npts = 2 * dimensions[2] - 1; - rfft_impl(args, dimensions, steps, func, npts); -} - - -static void -irfft_loop(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func) -{ - char *ip = args[0], *fp = args[1], *op = args[2]; - npy_intp n_outer = dimensions[0]; - npy_intp si = steps[0], sf = steps[1], so = steps[2]; - npy_intp nin = dimensions[1], nout = dimensions[2]; - npy_intp step_in = steps[3], step_out = steps[4]; - npy_intp npts = nout; - rfft_plan plan; - char *buff = NULL; - int no_mem = 1; - - if (nout == 0) { - return; - } - - plan = make_rfft_plan(npts); - if (plan == NULL) { - goto fail; - } - if (step_out != sizeof(npy_double)) { - buff = malloc(npts * sizeof(npy_double)); - if (buff == NULL) { - goto fail; - } - } - - for (npy_intp i = 0; i < n_outer; i++, ip += si, fp += sf, op += so) { - double fct = *(double *)fp; - char *op_or_buff = buff == NULL ? op : buff; - double *op_double = (double *)op_or_buff; - /* - * Pocket_fft works in-place and for inverse real transforms the - * frequency data thus needs to be compressed, removing the imaginary - * component of the zero-frequency item (which is the sum of all - * inputs and thus has to be real), as well as the imaginary component - * of the Nyquist frequency for even number of points. We thus copy - * the data to the buffer in the following order (also used by - * FFTpack): R0,R1,I1,...Rn-1,In-1,Rn[,In] (last for npts odd only). - */ - op_double[0] = ((double *)ip)[0]; /* copy R0 */ - if (npts > 1) { - /* - * Copy R1,I1... up to Rn-1,In-1 if possible, stopping earlier - * if not all the input points are needed or if the input is short - * (in the latter case, zeroing after). - */ - copy_data(ip + step_in, step_in, nin - 1, - (char *)&op_double[1], sizeof(npy_cdouble), (npts - 1) / 2, - sizeof(npy_cdouble)); - /* For even npts, we still need to set Rn. */ - if (npts % 2 == 0) { - op_double[npts - 1] = (npts / 2 >= nin) ? 0. : - ((double *)(ip + (npts / 2) * step_in))[0]; - } - } - if ((no_mem = rfft_backward(plan, op_double, fct)) != 0) { - break; - } - if (op_or_buff == buff) { - copy_data(op_or_buff, sizeof(npy_double), npts, - op, step_out, npts, sizeof(npy_double)); - } - } - fail: - free(buff); - destroy_rfft_plan(plan); - if (no_mem) { - /* TODO: Requires use of new ufunc API to indicate error return */ - NPY_ALLOW_C_API_DEF - NPY_ALLOW_C_API; - PyErr_NoMemory(); - NPY_DISABLE_C_API; - } - return; -} - - -static PyUFuncGenericFunction fft_functions[] = { fft_loop }; -static char fft_types[] = { NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE}; -static void *fft_data[] = { &cfft_forward }; -static void *ifft_data[] = { &cfft_backward }; - -static PyUFuncGenericFunction rfft_n_even_functions[] = { rfft_n_even_loop }; -static PyUFuncGenericFunction rfft_n_odd_functions[] = { rfft_n_odd_loop }; -static char rfft_types[] = { NPY_DOUBLE, NPY_DOUBLE, NPY_CDOUBLE}; -static void *rfft_data[] = { (void *)NULL }; - -static PyUFuncGenericFunction irfft_functions[] = { irfft_loop }; -static char irfft_types[] = { NPY_CDOUBLE, NPY_DOUBLE, NPY_DOUBLE}; -static void *irfft_data[] = { (void *)NULL }; - -static int -add_gufuncs(PyObject *dictionary) { - PyObject *f; - - f = PyUFunc_FromFuncAndDataAndSignature( - fft_functions, fft_data, fft_types, 1, 2, 1, PyUFunc_None, - "fft", "complex forward FFT\n", 0, "(n),()->(m)"); - if (f == NULL) { - return -1; - } - PyDict_SetItemString(dictionary, "fft", f); - Py_DECREF(f); - f = PyUFunc_FromFuncAndDataAndSignature( - fft_functions, ifft_data, fft_types, 1, 2, 1, PyUFunc_None, - "ifft", "complex backward FFT\n", 0, "(m),()->(n)"); - if (f == NULL) { - return -1; - } - PyDict_SetItemString(dictionary, "ifft", f); - Py_DECREF(f); - f = PyUFunc_FromFuncAndDataAndSignature( - rfft_n_even_functions, rfft_data, rfft_types, 1, 2, 1, PyUFunc_None, - "rfft_n_even", "real forward FFT for even n\n", 0, "(n),()->(m)"); - if (f == NULL) { - return -1; - } - PyDict_SetItemString(dictionary, "rfft_n_even", f); - Py_DECREF(f); - f = PyUFunc_FromFuncAndDataAndSignature( - rfft_n_odd_functions, rfft_data, rfft_types, 1, 2, 1, PyUFunc_None, - "rfft_n_odd", "real forward FFT for odd n\n", 0, "(n),()->(m)"); - if (f == NULL) { - return -1; - } - PyDict_SetItemString(dictionary, "rfft_n_odd", f); - Py_DECREF(f); - f = PyUFunc_FromFuncAndDataAndSignature( - irfft_functions, irfft_data, irfft_types, 1, 2, 1, PyUFunc_None, - "irfft", "real backward FFT\n", 0, "(m),()->(n)"); - if (f == NULL) { - return -1; - } - PyDict_SetItemString(dictionary, "irfft", f); - Py_DECREF(f); - return 0; -} - -static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - .m_name = "_umath_pocketfft", - .m_size = -1, -}; - -/* Initialization function for the module */ -PyMODINIT_FUNC PyInit__pocketfft_umath(void) -{ - PyObject *m = PyModule_Create(&moduledef); - if (m == NULL) { - return NULL; - } - - /* Import the array and ufunc objects */ - import_array(); - import_ufunc(); - - PyObject *d = PyModule_GetDict(m); - if (add_gufuncs(d) < 0) { - Py_DECREF(d); - Py_DECREF(m); - return NULL; - } - - return m; -} diff --git a/numpy/fft/_pocketfft_umath.cpp b/numpy/fft/_pocketfft_umath.cpp new file mode 100644 index 000000000000..127ebfdb6149 --- /dev/null +++ b/numpy/fft/_pocketfft_umath.cpp @@ -0,0 +1,422 @@ +/* + * This file is part of pocketfft. + * Licensed under a 3-clause BSD style license - see LICENSE.md + */ + +/* + * Main implementation file. + * + * Copyright (C) 2004-2018 Max-Planck-Society + * \author Martin Reinecke + */ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#define PY_SSIZE_T_CLEAN +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" + +#include "npy_config.h" + +#define POCKETFFT_NO_MULTITHREADING +#include "pocketfft/pocketfft_hdronly.h" + +/* + * In order to ensure that C++ exceptions are converted to Python + * ones before crossing over to the C machinery, we must catch them. + * This template can be used to wrap a C++ written ufunc to do this via: + * wrap_legacy_cpp_ufunc + */ +template +static void +wrap_legacy_cpp_ufunc(char **args, npy_intp const *dimensions, + ptrdiff_t const *steps, void *func) +{ + NPY_ALLOW_C_API_DEF + try { + cpp_ufunc(args, dimensions, steps, func); + } + catch (std::bad_alloc& e) { + NPY_ALLOW_C_API; + PyErr_NoMemory(); + NPY_DISABLE_C_API; + } + catch (const std::exception& e) { + NPY_ALLOW_C_API; + PyErr_SetString(PyExc_RuntimeError, e.what()); + NPY_DISABLE_C_API; + } +} + +/* + * Transfer to and from a contiguous buffer. + * copy_input: copy min(nin, n) elements from input to buffer and zero rest. + * copy_output: copy n elements from buffer to output. + */ +template +static inline void +copy_input(char *in, npy_intp step_in, size_t nin, + T buff[], size_t n) +{ + size_t ncopy = nin <= n ? nin : n; + char *ip = in; + size_t i; + for (i = 0; i < ncopy; i++, ip += step_in) { + buff[i] = *(T *)ip; + } + for (; i < n; i++) { + buff[i] = 0; + } +} + +template +static inline void +copy_output(T buff[], char *out, npy_intp step_out, size_t n) +{ + char *op = out; + for (size_t i = 0; i < n; i++, op += step_out) { + *(T *)op = buff[i]; + } +} + +/* + * Gufunc loops calling the pocketfft code. + */ +template +static void +fft_loop(char **args, npy_intp const *dimensions, ptrdiff_t const *steps, + void *func) +{ + char *ip = args[0], *fp = args[1], *op = args[2]; + size_t n_outer = (size_t)dimensions[0]; + ptrdiff_t si = steps[0], sf = steps[1], so = steps[2]; + size_t nin = (size_t)dimensions[1], nout = (size_t)dimensions[2]; + ptrdiff_t step_in = steps[3], step_out = steps[4]; + bool direction = *((bool *)func); /* pocketfft::FORWARD or BACKWARD */ + + assert (nout > 0); + +#ifndef POCKETFFT_NO_VECTORS + /* + * For the common case of nin >= nout, fixed factor, and suitably sized + * outer loop, we call pocketfft directly to benefit from its vectorization. + * (For nin>nout, this just removes the extra input points, as required; + * the vlen constraint avoids compiling extra code for longdouble, which + * cannot be vectorized so does not benefit.) + */ + constexpr auto vlen = pocketfft::detail::VLEN::val; + if (vlen > 1 && n_outer >= vlen && nin >= nout && sf == 0) { + std::vector shape = { n_outer, nout }; + std::vector strides_in = { si, step_in }; + std::vector strides_out = { so, step_out}; + std::vector axes = { 1 }; + pocketfft::c2c(shape, strides_in, strides_out, axes, direction, + (std::complex *)ip, (std::complex *)op, *(T *)fp); + return; + } +#endif + /* + * Otherwise, use a non-vectorized loop in which we try to minimize copies. + * We do still need a buffer if the output is not contiguous. + */ + auto plan = pocketfft::detail::get_plan>(nout); + auto buffered = (step_out != sizeof(std::complex)); + pocketfft::detail::arr> buff(buffered ? nout : 0); + for (size_t i = 0; i < n_outer; i++, ip += si, fp += sf, op += so) { + std::complex *op_or_buff = buffered ? buff.data() : (std::complex *)op; + if (ip != (char*)op_or_buff) { + copy_input(ip, step_in, nin, op_or_buff, nout); + } + plan->exec((pocketfft::detail::cmplx *)op_or_buff, *(T *)fp, direction); + if (buffered) { + copy_output(op_or_buff, op, step_out, nout); + } + } + return; +} + +template +static void +rfft_impl(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *func, size_t npts) +{ + char *ip = args[0], *fp = args[1], *op = args[2]; + size_t n_outer = (size_t)dimensions[0]; + ptrdiff_t si = steps[0], sf = steps[1], so = steps[2]; + size_t nin = (size_t)dimensions[1], nout = (size_t)dimensions[2]; + ptrdiff_t step_in = steps[3], step_out = steps[4]; + + assert (nout > 0 && nout == npts / 2 + 1); + +#ifndef POCKETFFT_NO_VECTORS + /* + * Call pocketfft directly if vectorization is possible. + */ + constexpr auto vlen = pocketfft::detail::VLEN::val; + if (vlen > 1 && n_outer >= vlen && nin >= npts && sf == 0) { + std::vector shape_in = { n_outer, npts }; + std::vector strides_in = { si, step_in }; + std::vector strides_out = { so, step_out}; + std::vector axes = { 1 }; + pocketfft::r2c(shape_in, strides_in, strides_out, axes, pocketfft::FORWARD, + (T *)ip, (std::complex *)op, *(T *)fp); + return; + } +#endif + /* + * Otherwise, use a non-vectorized loop in which we try to minimize copies. + * We do still need a buffer if the output is not contiguous. + */ + auto plan = pocketfft::detail::get_plan>(npts); + auto buffered = (step_out != sizeof(std::complex)); + pocketfft::detail::arr> buff(buffered ? nout : 0); + for (size_t i = 0; i < n_outer; i++, ip += si, fp += sf, op += so) { + std::complex *op_or_buff = buffered ? buff.data() : (std::complex *)op; + /* + * The internal pocketfft routines work in-place and for real + * transforms the frequency data thus needs to be compressed, using + * that there will be no imaginary component for the zero-frequency + * item (which is the sum of all inputs and thus has to be real), nor + * one for the Nyquist frequency for even number of points. + * Pocketfft uses FFTpack order, R0,R1,I1,...Rn-1,In-1,Rn[,In] (last + * for npts odd only). To make unpacking easy, we place the real data + * offset by one in the buffer, so that we just have to move R0 and + * create I0=0. Note that copy_data will zero the In component for + * even number of points. + */ + copy_input(ip, step_in, nin, &((T *)op_or_buff)[1], nout*2 - 1); + plan->exec(&((T *)op_or_buff)[1], *(T *)fp, pocketfft::FORWARD); + op_or_buff[0] = op_or_buff[0].imag(); // I0->R0, I0=0 + if (buffered) { + copy_output(op_or_buff, op, step_out, nout); + } + } + return; +} + +/* + * For the forward real, we cannot know what the requested number of points is + * just based on the number of points in the complex output array (e.g., 10 + * and 11 real input points both lead to 6 complex output points), so we + * define versions for both even and odd number of points. + */ +template +static void +rfft_n_even_loop(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func) +{ + size_t nout = (size_t)dimensions[2]; + assert (nout > 0); + size_t npts = 2 * nout - 2; + rfft_impl(args, dimensions, steps, func, npts); +} + +template +static void +rfft_n_odd_loop(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func) +{ + size_t nout = (size_t)dimensions[2]; + assert (nout > 0); + size_t npts = 2 * nout - 1; + rfft_impl(args, dimensions, steps, func, npts); +} + +template +static void +irfft_loop(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func) +{ + char *ip = args[0], *fp = args[1], *op = args[2]; + size_t n_outer = (size_t)dimensions[0]; + ptrdiff_t si = steps[0], sf = steps[1], so = steps[2]; + size_t nin = (size_t)dimensions[1], nout = (size_t)dimensions[2]; + ptrdiff_t step_in = steps[3], step_out = steps[4]; + + size_t npts_in = nout / 2 + 1; + + assert(nout > 0); + +#ifndef POCKETFFT_NO_VECTORS + /* + * Call pocketfft directly if vectorization is possible. + */ + constexpr auto vlen = pocketfft::detail::VLEN::val; + if (vlen > 1 && n_outer >= vlen && nin >= npts_in && sf == 0) { + std::vector axes = { 1 }; + std::vector shape_out = { n_outer, nout }; + std::vector strides_in = { si, step_in }; + std::vector strides_out = { so, step_out}; + pocketfft::c2r(shape_out, strides_in, strides_out, axes, pocketfft::BACKWARD, + (std::complex *)ip, (T *)op, *(T *)fp); + return; + } +#endif + /* + * Otherwise, use a non-vectorized loop in which we try to minimize copies. + * We do still need a buffer if the output is not contiguous. + */ + auto plan = pocketfft::detail::get_plan>(nout); + auto buffered = (step_out != sizeof(T)); + pocketfft::detail::arr buff(buffered ? nout : 0); + for (size_t i = 0; i < n_outer; i++, ip += si, fp += sf, op += so) { + T *op_or_buff = buffered ? buff.data() : (T *)op; + /* + * Pocket_fft works in-place and for inverse real transforms the + * frequency data thus needs to be compressed, removing the imaginary + * component of the zero-frequency item (which is the sum of all + * inputs and thus has to be real), as well as the imaginary component + * of the Nyquist frequency for even number of points. We thus copy + * the data to the buffer in the following order (also used by + * FFTpack): R0,R1,I1,...Rn-1,In-1,Rn[,In] (last for npts odd only). + */ + op_or_buff[0] = ((T *)ip)[0]; /* copy R0 */ + if (nout > 1) { + /* + * Copy R1,I1... up to Rn-1,In-1 if possible, stopping earlier + * if not all the input points are needed or if the input is short + * (in the latter case, zeroing after). + */ + copy_input(ip + step_in, step_in, nin - 1, + (std::complex *)&op_or_buff[1], (nout - 1) / 2); + /* For even nout, we still need to set Rn. */ + if (nout % 2 == 0) { + op_or_buff[nout - 1] = (nout / 2 >= nin) ? (T)0 : + ((T *)(ip + (nout / 2) * step_in))[0]; + } + } + plan->exec(op_or_buff, *(T *)fp, pocketfft::BACKWARD); + if (buffered) { + copy_output(op_or_buff, op, step_out, nout); + } + } + return; +} + +static PyUFuncGenericFunction fft_functions[] = { + wrap_legacy_cpp_ufunc>, + wrap_legacy_cpp_ufunc>, + wrap_legacy_cpp_ufunc> +}; +static char fft_types[] = { + NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE, + NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, + NPY_CLONGDOUBLE, NPY_LONGDOUBLE, NPY_CLONGDOUBLE +}; +static void *fft_data[] = { + (void*)&pocketfft::FORWARD, + (void*)&pocketfft::FORWARD, + (void*)&pocketfft::FORWARD +}; +static void *ifft_data[] = { + (void*)&pocketfft::BACKWARD, + (void*)&pocketfft::BACKWARD, + (void*)&pocketfft::BACKWARD +}; + +static PyUFuncGenericFunction rfft_n_even_functions[] = { + wrap_legacy_cpp_ufunc>, + wrap_legacy_cpp_ufunc>, + wrap_legacy_cpp_ufunc> +}; +static PyUFuncGenericFunction rfft_n_odd_functions[] = { + wrap_legacy_cpp_ufunc>, + wrap_legacy_cpp_ufunc>, + wrap_legacy_cpp_ufunc> +}; +static char rfft_types[] = { + NPY_DOUBLE, NPY_DOUBLE, NPY_CDOUBLE, + NPY_FLOAT, NPY_FLOAT, NPY_CFLOAT, + NPY_LONGDOUBLE, NPY_LONGDOUBLE, NPY_CLONGDOUBLE +}; + +static PyUFuncGenericFunction irfft_functions[] = { + wrap_legacy_cpp_ufunc>, + wrap_legacy_cpp_ufunc>, + wrap_legacy_cpp_ufunc> +}; +static char irfft_types[] = { + NPY_CDOUBLE, NPY_DOUBLE, NPY_DOUBLE, + NPY_CFLOAT, NPY_FLOAT, NPY_FLOAT, + NPY_CLONGDOUBLE, NPY_LONGDOUBLE, NPY_LONGDOUBLE +}; + +static int +add_gufuncs(PyObject *dictionary) { + PyObject *f; + + f = PyUFunc_FromFuncAndDataAndSignature( + fft_functions, fft_data, fft_types, 3, 2, 1, PyUFunc_None, + "fft", "complex forward FFT\n", 0, "(n),()->(m)"); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "fft", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature( + fft_functions, ifft_data, fft_types, 3, 2, 1, PyUFunc_None, + "ifft", "complex backward FFT\n", 0, "(m),()->(n)"); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "ifft", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature( + rfft_n_even_functions, NULL, rfft_types, 3, 2, 1, PyUFunc_None, + "rfft_n_even", "real forward FFT for even n\n", 0, "(n),()->(m)"); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "rfft_n_even", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature( + rfft_n_odd_functions, NULL, rfft_types, 3, 2, 1, PyUFunc_None, + "rfft_n_odd", "real forward FFT for odd n\n", 0, "(n),()->(m)"); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "rfft_n_odd", f); + Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature( + irfft_functions, NULL, irfft_types, 3, 2, 1, PyUFunc_None, + "irfft", "real backward FFT\n", 0, "(m),()->(n)"); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "irfft", f); + Py_DECREF(f); + return 0; +} + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_multiarray_umath", + NULL, + -1, + NULL, + NULL, + NULL, + NULL, + NULL +}; + +/* Initialization function for the module */ +PyMODINIT_FUNC PyInit__pocketfft_umath(void) +{ + PyObject *m = PyModule_Create(&moduledef); + if (m == NULL) { + return NULL; + } + + /* Import the array and ufunc objects */ + import_array(); + import_ufunc(); + + PyObject *d = PyModule_GetDict(m); + if (add_gufuncs(d) < 0) { + Py_DECREF(d); + Py_DECREF(m); + return NULL; + } + + return m; +} diff --git a/numpy/fft/meson.build b/numpy/fft/meson.build index e2d4e00c05c5..3606acee0109 100644 --- a/numpy/fft/meson.build +++ b/numpy/fft/meson.build @@ -4,7 +4,7 @@ if host_machine.system() == 'aix' or host_machine.system() == 'AIX' endif py.extension_module('_pocketfft_umath', - ['_pocketfft_umath.c', 'pocketfft/pocketfft.c'], + ['_pocketfft_umath.cpp'], c_args: largefile_define, dependencies: np_core_dep, install: true, diff --git a/numpy/fft/pocketfft b/numpy/fft/pocketfft new file mode 160000 index 000000000000..0f7aa1225b06 --- /dev/null +++ b/numpy/fft/pocketfft @@ -0,0 +1 @@ +Subproject commit 0f7aa1225b065938fc263b7914df16b8c1cbc9d7 diff --git a/numpy/fft/pocketfft/README.md b/numpy/fft/pocketfft/README.md deleted file mode 100644 index f79188139ad9..000000000000 --- a/numpy/fft/pocketfft/README.md +++ /dev/null @@ -1,48 +0,0 @@ -PocketFFT ---------- - -This is a heavily modified implementation of FFTPack [1,2], with the following -advantages: - -- strictly C99 compliant -- more accurate twiddle factor computation -- very fast plan generation -- worst case complexity for transform sizes with large prime factors is - `N*log(N)`, because Bluestein's algorithm [3] is used for these cases. - - -Some code details ------------------ - -Twiddle factor computation: - -- making use of symmetries to reduce number of sin/cos evaluations -- all angles are reduced to the range `[0; pi/4]` for higher accuracy -- an adapted implementation of `sincospi()` is used, which actually computes - `sin(x)` and `(cos(x)-1)`. -- if `n` sin/cos pairs are required, the adjusted `sincospi()` is only called - `2*sqrt(n)` times; the remaining values are obtained by evaluating the - angle addition theorems in a numerically accurate way. - -Parallel invocation: - -- Plans only contain read-only data; all temporary arrays are allocated and - deallocated during an individual FFT execution. This means that a single plan - can be used in several threads at the same time. - -Efficient codelets are available for the factors: - -- 2, 3, 4, 5, 7, 11 for complex-valued FFTs -- 2, 3, 4, 5 for real-valued FFTs - -Larger prime factors are handled by somewhat less efficient, generic routines. - -For lengths with very large prime factors, Bluestein's algorithm is used, and -instead of an FFT of length `n`, a convolution of length `n2 >= 2*n-1` -is performed, where `n2` is chosen to be highly composite. - - -[1] Swarztrauber, P. 1982, Vectorizing the Fast Fourier Transforms - (New York: Academic Press), 51 -[2] https://www.netlib.org/fftpack/ -[3] https://en.wikipedia.org/wiki/Chirp_Z-transform diff --git a/numpy/fft/pocketfft/pocketfft.c b/numpy/fft/pocketfft/pocketfft.c deleted file mode 100644 index 669ebacc1bbc..000000000000 --- a/numpy/fft/pocketfft/pocketfft.c +++ /dev/null @@ -1,2194 +0,0 @@ -/* - * This file is part of pocketfft. - * Licensed under a 3-clause BSD style license - see LICENSE.md - */ - -/* - * Main implementation file. - * - * Copyright (C) 2004-2018 Max-Planck-Society - * \author Martin Reinecke - */ - -#include -#include -#include - -#include "pocketfft.h" - -#define RALLOC(type,num) \ - (assert(num != 0), ((type *)malloc((num)*sizeof(type)))) -#define DEALLOC(ptr) \ - do { free(ptr); (ptr)=NULL; } while(0) - -#define SWAP(a,b,type) \ - do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0) - -#ifdef __GNUC__ -#define NOINLINE __attribute__((noinline)) -#define WARN_UNUSED_RESULT __attribute__ ((warn_unused_result)) -#else -#define NOINLINE -#define WARN_UNUSED_RESULT -#endif - -// adapted from https://stackoverflow.com/questions/42792939/ -// CAUTION: this function only works for arguments in the range [-0.25; 0.25]! -static void my_sincosm1pi (double a, double *restrict res) - { - double s = a * a; - /* Approximate cos(pi*x)-1 for x in [-0.25,0.25] */ - double r = -1.0369917389758117e-4; - r = fma (r, s, 1.9294935641298806e-3); - r = fma (r, s, -2.5806887942825395e-2); - r = fma (r, s, 2.3533063028328211e-1); - r = fma (r, s, -1.3352627688538006e+0); - r = fma (r, s, 4.0587121264167623e+0); - r = fma (r, s, -4.9348022005446790e+0); - double c = r*s; - /* Approximate sin(pi*x) for x in [-0.25,0.25] */ - r = 4.6151442520157035e-4; - r = fma (r, s, -7.3700183130883555e-3); - r = fma (r, s, 8.2145868949323936e-2); - r = fma (r, s, -5.9926452893214921e-1); - r = fma (r, s, 2.5501640398732688e+0); - r = fma (r, s, -5.1677127800499516e+0); - s = s * a; - r = r * s; - s = fma (a, 3.1415926535897931e+0, r); - res[0] = c; - res[1] = s; - } - -NOINLINE static void calc_first_octant(size_t den, double * restrict res) - { - size_t n = (den+4)>>3; - if (n==0) return; - res[0]=1.; res[1]=0.; - if (n==1) return; - size_t l1=(size_t)sqrt(n); - for (size_t i=1; in) end = n-start; - for (size_t i=1; i>2; - size_t i=0, idx1=0, idx2=2*ndone-2; - for (; i+1>1; - double * p = res+n-1; - calc_first_octant(n<<2, p); - int i4=0, in=n, i=0; - for (; i4<=in-i4; ++i, i4+=4) // octant 0 - { - res[2*i] = p[2*i4]; res[2*i+1] = p[2*i4+1]; - } - for (; i4-in <= 0; ++i, i4+=4) // octant 1 - { - int xm = in-i4; - res[2*i] = p[2*xm+1]; res[2*i+1] = p[2*xm]; - } - for (; i4<=3*in-i4; ++i, i4+=4) // octant 2 - { - int xm = i4-in; - res[2*i] = -p[2*xm+1]; res[2*i+1] = p[2*xm]; - } - for (; i>2; - if ((n&7)==0) - res[quart] = res[quart+1] = hsqt2; - for (size_t i=2, j=2*quart-2; i>1; - if ((n&3)==0) - for (size_t i=0; i>1))<<1)==n) - { res=2; n=tmp; } - - size_t limit=(size_t)sqrt(n+0.01); - for (size_t x=3; x<=limit; x+=2) - while (((tmp=(n/x))*x)==n) - { - res=x; - n=tmp; - limit=(size_t)sqrt(n+0.01); - } - if (n>1) res=n; - - return res; - } - -NOINLINE static double cost_guess (size_t n) - { - const double lfp=1.1; // penalty for non-hardcoded larger factors - size_t ni=n; - double result=0.; - size_t tmp; - while (((tmp=(n>>1))<<1)==n) - { result+=2; n=tmp; } - - size_t limit=(size_t)sqrt(n+0.01); - for (size_t x=3; x<=limit; x+=2) - while ((tmp=(n/x))*x==n) - { - result+= (x<=5) ? x : lfp*x; // penalize larger prime factors - n=tmp; - limit=(size_t)sqrt(n+0.01); - } - if (n>1) result+=(n<=5) ? n : lfp*n; - - return result*ni; - } - -/* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ -NOINLINE static size_t good_size(size_t n) - { - if (n<=6) return n; - - size_t bestfac=2*n; - for (size_t f2=1; f2=n) bestfac=f235711; - return bestfac; - } - -typedef struct cmplx { - double r,i; -} cmplx; - -#define NFCT 25 -typedef struct cfftp_fctdata - { - size_t fct; - cmplx *tw, *tws; - } cfftp_fctdata; - -typedef struct cfftp_plan_i - { - size_t length, nfct; - cmplx *mem; - cfftp_fctdata fct[NFCT]; - } cfftp_plan_i; -typedef struct cfftp_plan_i * cfftp_plan; - -#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; } -#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; } -#define SCALEC(a,b) { a.r*=b; a.i*=b; } -#define ROT90(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; } -#define ROTM90(a) { double tmp_=-a.r; a.r=a.i; a.i=tmp_; } -#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] -#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] -#define WA(x,i) wa[(i)-1+(x)*(ido-1)] -/* a = b*c */ -#define A_EQ_B_MUL_C(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; } -/* a = conj(b)*c*/ -#define A_EQ_CB_MUL_C(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; } - -#define PMSIGNC(a,b,c,d) { a.r=c.r+sign*d.r; a.i=c.i+sign*d.i; b.r=c.r-sign*d.r; b.i=c.i-sign*d.i; } -/* a = b*c */ -#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-sign*b.i*c.i; a.i=b.r*c.i+sign*b.i*c.r; } -/* a *= b */ -#define MULPMSIGNCEQ(a,b) { double xtmp=a.r; a.r=b.r*a.r-sign*b.i*a.i; a.i=b.r*a.i+sign*b.i*xtmp; } - -NOINLINE static void pass2b (size_t ido, size_t l1, const cmplx * restrict cc, - cmplx * restrict ch, const cmplx * restrict wa) - { - const size_t cdim=2; - - if (ido==1) - for (size_t k=0; kip) iwal-=ip; - cmplx xwal=wal[iwal]; - iwal+=l; if (iwal>ip) iwal-=ip; - cmplx xwal2=wal[iwal]; - for (size_t ik=0; ikip) iwal-=ip; - cmplx xwal=wal[iwal]; - for (size_t ik=0; iklength==1) return 0; - size_t len=plan->length; - size_t l1=1, nf=plan->nfct; - cmplx *ch = RALLOC(cmplx, len); - if (!ch) return -1; - cmplx *p1=c, *p2=ch; - - for(size_t k1=0; k1fct[k1].fct; - size_t l2=ip*l1; - size_t ido = len/l2; - if (ip==4) - sign>0 ? pass4b (ido, l1, p1, p2, plan->fct[k1].tw) - : pass4f (ido, l1, p1, p2, plan->fct[k1].tw); - else if(ip==2) - sign>0 ? pass2b (ido, l1, p1, p2, plan->fct[k1].tw) - : pass2f (ido, l1, p1, p2, plan->fct[k1].tw); - else if(ip==3) - sign>0 ? pass3b (ido, l1, p1, p2, plan->fct[k1].tw) - : pass3f (ido, l1, p1, p2, plan->fct[k1].tw); - else if(ip==5) - sign>0 ? pass5b (ido, l1, p1, p2, plan->fct[k1].tw) - : pass5f (ido, l1, p1, p2, plan->fct[k1].tw); - else if(ip==7) pass7 (ido, l1, p1, p2, plan->fct[k1].tw, sign); - else if(ip==11) pass11(ido, l1, p1, p2, plan->fct[k1].tw, sign); - else - { - if (passg(ido, ip, l1, p1, p2, plan->fct[k1].tw, plan->fct[k1].tws, sign)) - { DEALLOC(ch); return -1; } - SWAP(p1,p2,cmplx *); - } - SWAP(p1,p2,cmplx *); - l1=l2; - } - if (p1!=c) - { - if (fct!=1.) - for (size_t i=0; ilength; - size_t nfct=0; - while ((length%4)==0) - { if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=4; length>>=2; } - if ((length%2)==0) - { - length>>=1; - // factor 2 should be at the front of the factor list - if (nfct>=NFCT) return -1; - plan->fct[nfct++].fct=2; - SWAP(plan->fct[0].fct, plan->fct[nfct-1].fct,size_t); - } - size_t maxl=(size_t)(sqrt((double)length))+1; - for (size_t divisor=3; (length>1)&&(divisor=NFCT) return -1; - plan->fct[nfct++].fct=divisor; - length/=divisor; - } - maxl=(size_t)(sqrt((double)length))+1; - } - if (length>1) plan->fct[nfct++].fct=length; - plan->nfct=nfct; - return 0; - } - -NOINLINE static size_t cfftp_twsize (cfftp_plan plan) - { - size_t twsize=0, l1=1; - for (size_t k=0; knfct; ++k) - { - size_t ip=plan->fct[k].fct, ido= plan->length/(l1*ip); - twsize+=(ip-1)*(ido-1); - if (ip>11) - twsize+=ip; - l1*=ip; - } - return twsize; - } - -NOINLINE WARN_UNUSED_RESULT static int cfftp_comp_twiddle (cfftp_plan plan) - { - size_t length=plan->length; - double *twid = RALLOC(double, 2*length); - if (!twid) return -1; - sincos_2pibyn(length, twid); - size_t l1=1; - size_t memofs=0; - for (size_t k=0; knfct; ++k) - { - size_t ip=plan->fct[k].fct, ido= length/(l1*ip); - plan->fct[k].tw=plan->mem+memofs; - memofs+=(ip-1)*(ido-1); - for (size_t j=1; jfct[k].tw[(j-1)*(ido-1)+i-1].r = twid[2*j*l1*i]; - plan->fct[k].tw[(j-1)*(ido-1)+i-1].i = twid[2*j*l1*i+1]; - } - if (ip>11) - { - plan->fct[k].tws=plan->mem+memofs; - memofs+=ip; - for (size_t j=0; jfct[k].tws[j].r = twid[2*j*l1*ido]; - plan->fct[k].tws[j].i = twid[2*j*l1*ido+1]; - } - } - l1*=ip; - } - DEALLOC(twid); - return 0; - } - -static cfftp_plan make_cfftp_plan (size_t length) - { - if (length==0) return NULL; - cfftp_plan plan = RALLOC(cfftp_plan_i,1); - if (!plan) return NULL; - plan->length=length; - plan->nfct=0; - for (size_t i=0; ifct[i]=(cfftp_fctdata){0,0,0}; - plan->mem=0; - if (length==1) return plan; - if (cfftp_factorize(plan)!=0) { DEALLOC(plan); return NULL; } - size_t tws=cfftp_twsize(plan); - if (tws != 0) { - plan->mem=RALLOC(cmplx,tws); - if (!plan->mem) { DEALLOC(plan); return NULL; } - } - if (cfftp_comp_twiddle(plan)!=0) - { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - return plan; - } - -static void destroy_cfftp_plan (cfftp_plan plan) - { - DEALLOC(plan->mem); - DEALLOC(plan); - } - -typedef struct rfftp_fctdata - { - size_t fct; - double *tw, *tws; - } rfftp_fctdata; - -typedef struct rfftp_plan_i - { - size_t length, nfct; - double *mem; - rfftp_fctdata fct[NFCT]; - } rfftp_plan_i; -typedef struct rfftp_plan_i * rfftp_plan; - -#define WA(x,i) wa[(i)+(x)*(ido-1)] -#define PM(a,b,c,d) { a=c+d; b=c-d; } -/* (a+ib) = conj(c+id) * (e+if) */ -#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; } - -#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))] -#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))] - -NOINLINE static void radf2 (size_t ido, size_t l1, const double * restrict cc, - double * restrict ch, const double * restrict wa) - { - const size_t cdim=2; - - for (size_t k=0; k1) - { - for (size_t j=1, jc=ip-1; j=ip) iang-=ip; - double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; - iang+=l; if (iang>=ip) iang-=ip; - double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; - iang+=l; if (iang>=ip) iang-=ip; - double ar3=csarr[2*iang], ai3=csarr[2*iang+1]; - iang+=l; if (iang>=ip) iang-=ip; - double ar4=csarr[2*iang], ai4=csarr[2*iang+1]; - for (size_t ik=0; ik=ip) iang-=ip; - double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; - iang+=l; if (iang>=ip) iang-=ip; - double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; - for (size_t ik=0; ik=ip) iang-=ip; - double ar=csarr[2*iang], ai=csarr[2*iang+1]; - for (size_t ik=0; ikip) iang-=ip; - double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; - iang+=l; if(iang>ip) iang-=ip; - double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; - iang+=l; if(iang>ip) iang-=ip; - double ar3=csarr[2*iang], ai3=csarr[2*iang+1]; - iang+=l; if(iang>ip) iang-=ip; - double ar4=csarr[2*iang], ai4=csarr[2*iang+1]; - for (size_t ik=0; ikip) iang-=ip; - double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; - iang+=l; if(iang>ip) iang-=ip; - double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; - for (size_t ik=0; ikip) iang-=ip; - double war=csarr[2*iang], wai=csarr[2*iang+1]; - for (size_t ik=0; iklength==1) return 0; - size_t n=plan->length; - size_t l1=n, nf=plan->nfct; - double *ch = RALLOC(double, n); - if (!ch) return -1; - double *p1=c, *p2=ch; - - for(size_t k1=0; k1fct[k].fct; - size_t ido=n / l1; - l1 /= ip; - if(ip==4) - radf4(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==2) - radf2(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==3) - radf3(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==5) - radf5(ido, l1, p1, p2, plan->fct[k].tw); - else - { - radfg(ido, ip, l1, p1, p2, plan->fct[k].tw, plan->fct[k].tws); - SWAP (p1,p2,double *); - } - SWAP (p1,p2,double *); - } - copy_and_norm(c,p1,n,fct); - DEALLOC(ch); - return 0; - } - -WARN_UNUSED_RESULT -static int rfftp_backward(rfftp_plan plan, double c[], double fct) - { - if (plan->length==1) return 0; - size_t n=plan->length; - size_t l1=1, nf=plan->nfct; - double *ch = RALLOC(double, n); - if (!ch) return -1; - double *p1=c, *p2=ch; - - for(size_t k=0; kfct[k].fct, - ido= n/(ip*l1); - if(ip==4) - radb4(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==2) - radb2(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==3) - radb3(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==5) - radb5(ido, l1, p1, p2, plan->fct[k].tw); - else - radbg(ido, ip, l1, p1, p2, plan->fct[k].tw, plan->fct[k].tws); - SWAP (p1,p2,double *); - l1*=ip; - } - copy_and_norm(c,p1,n,fct); - DEALLOC(ch); - return 0; - } - -WARN_UNUSED_RESULT -static int rfftp_factorize (rfftp_plan plan) - { - size_t length=plan->length; - size_t nfct=0; - while ((length%4)==0) - { if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=4; length>>=2; } - if ((length%2)==0) - { - length>>=1; - // factor 2 should be at the front of the factor list - if (nfct>=NFCT) return -1; - plan->fct[nfct++].fct=2; - SWAP(plan->fct[0].fct, plan->fct[nfct-1].fct,size_t); - } - size_t maxl=(size_t)(sqrt((double)length))+1; - for (size_t divisor=3; (length>1)&&(divisor=NFCT) return -1; - plan->fct[nfct++].fct=divisor; - length/=divisor; - } - maxl=(size_t)(sqrt((double)length))+1; - } - if (length>1) plan->fct[nfct++].fct=length; - plan->nfct=nfct; - return 0; - } - -static size_t rfftp_twsize(rfftp_plan plan) - { - size_t twsize=0, l1=1; - for (size_t k=0; knfct; ++k) - { - size_t ip=plan->fct[k].fct, ido= plan->length/(l1*ip); - twsize+=(ip-1)*(ido-1); - if (ip>5) twsize+=2*ip; - l1*=ip; - } - return twsize; - } - -WARN_UNUSED_RESULT NOINLINE static int rfftp_comp_twiddle (rfftp_plan plan) - { - size_t length=plan->length; - double *twid = RALLOC(double, 2*length); - if (!twid) return -1; - sincos_2pibyn_half(length, twid); - size_t l1=1; - double *ptr=plan->mem; - for (size_t k=0; knfct; ++k) - { - size_t ip=plan->fct[k].fct, ido=length/(l1*ip); - if (knfct-1) // last factor doesn't need twiddles - { - plan->fct[k].tw=ptr; ptr+=(ip-1)*(ido-1); - for (size_t j=1; jfct[k].tw[(j-1)*(ido-1)+2*i-2] = twid[2*j*l1*i]; - plan->fct[k].tw[(j-1)*(ido-1)+2*i-1] = twid[2*j*l1*i+1]; - } - } - if (ip>5) // special factors required by *g functions - { - plan->fct[k].tws=ptr; ptr+=2*ip; - plan->fct[k].tws[0] = 1.; - plan->fct[k].tws[1] = 0.; - for (size_t i=1; i<=(ip>>1); ++i) - { - plan->fct[k].tws[2*i ] = twid[2*i*(length/ip)]; - plan->fct[k].tws[2*i+1] = twid[2*i*(length/ip)+1]; - plan->fct[k].tws[2*(ip-i) ] = twid[2*i*(length/ip)]; - plan->fct[k].tws[2*(ip-i)+1] = -twid[2*i*(length/ip)+1]; - } - } - l1*=ip; - } - DEALLOC(twid); - return 0; - } - -NOINLINE static rfftp_plan make_rfftp_plan (size_t length) - { - if (length==0) return NULL; - rfftp_plan plan = RALLOC(rfftp_plan_i,1); - if (!plan) return NULL; - plan->length=length; - plan->nfct=0; - plan->mem=NULL; - for (size_t i=0; ifct[i]=(rfftp_fctdata){0,0,0}; - if (length==1) return plan; - if (rfftp_factorize(plan)!=0) { DEALLOC(plan); return NULL; } - size_t tws=rfftp_twsize(plan); - if (tws != 0) { - plan->mem=RALLOC(double,tws); - if (!plan->mem) { DEALLOC(plan); return NULL; } - } - if (rfftp_comp_twiddle(plan)!=0) - { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - return plan; - } - -NOINLINE static void destroy_rfftp_plan (rfftp_plan plan) - { - DEALLOC(plan->mem); - DEALLOC(plan); - } - -typedef struct fftblue_plan_i - { - size_t n, n2; - cfftp_plan plan; - double *mem; - double *bk, *bkf; - } fftblue_plan_i; -typedef struct fftblue_plan_i * fftblue_plan; - -NOINLINE static fftblue_plan make_fftblue_plan (size_t length) - { - fftblue_plan plan = RALLOC(fftblue_plan_i,1); - if (!plan) return NULL; - plan->n = length; - plan->n2 = good_size(plan->n*2-1); - plan->mem = RALLOC(double, 2*plan->n+2*plan->n2); - if (!plan->mem) { DEALLOC(plan); return NULL; } - plan->bk = plan->mem; - plan->bkf = plan->bk+2*plan->n; - -/* initialize b_k */ - double *tmp = RALLOC(double,4*plan->n); - if (!tmp) { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - sincos_2pibyn(2*plan->n,tmp); - plan->bk[0] = 1; - plan->bk[1] = 0; - - size_t coeff=0; - for (size_t m=1; mn; ++m) - { - coeff+=2*m-1; - if (coeff>=2*plan->n) coeff-=2*plan->n; - plan->bk[2*m ] = tmp[2*coeff ]; - plan->bk[2*m+1] = tmp[2*coeff+1]; - } - - /* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */ - double xn2 = 1./plan->n2; - plan->bkf[0] = plan->bk[0]*xn2; - plan->bkf[1] = plan->bk[1]*xn2; - for (size_t m=2; m<2*plan->n; m+=2) - { - plan->bkf[m] = plan->bkf[2*plan->n2-m] = plan->bk[m] *xn2; - plan->bkf[m+1] = plan->bkf[2*plan->n2-m+1] = plan->bk[m+1] *xn2; - } - for (size_t m=2*plan->n;m<=(2*plan->n2-2*plan->n+1);++m) - plan->bkf[m]=0.; - plan->plan=make_cfftp_plan(plan->n2); - if (!plan->plan) - { DEALLOC(tmp); DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - if (cfftp_forward(plan->plan,plan->bkf,1.)!=0) - { DEALLOC(tmp); DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - DEALLOC(tmp); - - return plan; - } - -NOINLINE static void destroy_fftblue_plan (fftblue_plan plan) - { - DEALLOC(plan->mem); - destroy_cfftp_plan(plan->plan); - DEALLOC(plan); - } - -NOINLINE WARN_UNUSED_RESULT -static int fftblue_fft(fftblue_plan plan, double c[], int isign, double fct) - { - size_t n=plan->n; - size_t n2=plan->n2; - double *bk = plan->bk; - double *bkf = plan->bkf; - double *akf = RALLOC(double, 2*n2); - if (!akf) return -1; - -/* initialize a_k and FFT it */ - if (isign>0) - for (size_t m=0; m<2*n; m+=2) - { - akf[m] = c[m]*bk[m] - c[m+1]*bk[m+1]; - akf[m+1] = c[m]*bk[m+1] + c[m+1]*bk[m]; - } - else - for (size_t m=0; m<2*n; m+=2) - { - akf[m] = c[m]*bk[m] + c[m+1]*bk[m+1]; - akf[m+1] =-c[m]*bk[m+1] + c[m+1]*bk[m]; - } - for (size_t m=2*n; m<2*n2; ++m) - akf[m]=0; - - if (cfftp_forward (plan->plan,akf,fct)!=0) - { DEALLOC(akf); return -1; } - -/* do the convolution */ - if (isign>0) - for (size_t m=0; m<2*n2; m+=2) - { - double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; - akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1]; - akf[m+1] = im; - } - else - for (size_t m=0; m<2*n2; m+=2) - { - double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; - akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1]; - akf[m+1] = im; - } - -/* inverse FFT */ - if (cfftp_backward (plan->plan,akf,1.)!=0) - { DEALLOC(akf); return -1; } - -/* multiply by b_k */ - if (isign>0) - for (size_t m=0; m<2*n; m+=2) - { - c[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1]; - c[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1]; - } - else - for (size_t m=0; m<2*n; m+=2) - { - c[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1]; - c[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1]; - } - DEALLOC(akf); - return 0; - } - -WARN_UNUSED_RESULT -static int cfftblue_backward(fftblue_plan plan, double c[], double fct) - { return fftblue_fft(plan,c,1,fct); } - -WARN_UNUSED_RESULT -static int cfftblue_forward(fftblue_plan plan, double c[], double fct) - { return fftblue_fft(plan,c,-1,fct); } - -WARN_UNUSED_RESULT -static int rfftblue_backward(fftblue_plan plan, double c[], double fct) - { - size_t n=plan->n; - double *tmp = RALLOC(double,2*n); - if (!tmp) return -1; - tmp[0]=c[0]; - tmp[1]=0.; - memcpy (tmp+2,c+1, (n-1)*sizeof(double)); - if ((n&1)==0) tmp[n+1]=0.; - for (size_t m=2; mn; - double *tmp = RALLOC(double,2*n); - if (!tmp) return -1; - for (size_t m=0; mblueplan=0; - plan->packplan=0; - if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) - { - plan->packplan=make_cfftp_plan(length); - if (!plan->packplan) { DEALLOC(plan); return NULL; } - return plan; - } - double comp1 = cost_guess(length); - double comp2 = 2*cost_guess(good_size(2*length-1)); - comp2*=1.5; /* fudge factor that appears to give good overall performance */ - if (comp2blueplan=make_fftblue_plan(length); - if (!plan->blueplan) { DEALLOC(plan); return NULL; } - } - else - { - plan->packplan=make_cfftp_plan(length); - if (!plan->packplan) { DEALLOC(plan); return NULL; } - } - return plan; - } - -void destroy_cfft_plan (cfft_plan plan) - { - if (plan->blueplan) - destroy_fftblue_plan(plan->blueplan); - if (plan->packplan) - destroy_cfftp_plan(plan->packplan); - DEALLOC(plan); - } - -WARN_UNUSED_RESULT int cfft_backward(cfft_plan plan, double c[], double fct) - { - if (plan->packplan) - return cfftp_backward(plan->packplan,c,fct); - // if (plan->blueplan) - return cfftblue_backward(plan->blueplan,c,fct); - } - -WARN_UNUSED_RESULT int cfft_forward(cfft_plan plan, double c[], double fct) - { - if (plan->packplan) - return cfftp_forward(plan->packplan,c,fct); - // if (plan->blueplan) - return cfftblue_forward(plan->blueplan,c,fct); - } - -typedef struct rfft_plan_i - { - rfftp_plan packplan; - fftblue_plan blueplan; - } rfft_plan_i; - -rfft_plan make_rfft_plan (size_t length) - { - if (length==0) return NULL; - rfft_plan plan = RALLOC(rfft_plan_i,1); - if (!plan) return NULL; - plan->blueplan=0; - plan->packplan=0; - if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) - { - plan->packplan=make_rfftp_plan(length); - if (!plan->packplan) { DEALLOC(plan); return NULL; } - return plan; - } - double comp1 = 0.5*cost_guess(length); - double comp2 = 2*cost_guess(good_size(2*length-1)); - comp2*=1.5; /* fudge factor that appears to give good overall performance */ - if (comp2blueplan=make_fftblue_plan(length); - if (!plan->blueplan) { DEALLOC(plan); return NULL; } - } - else - { - plan->packplan=make_rfftp_plan(length); - if (!plan->packplan) { DEALLOC(plan); return NULL; } - } - return plan; - } - -void destroy_rfft_plan (rfft_plan plan) - { - if (plan->blueplan) - destroy_fftblue_plan(plan->blueplan); - if (plan->packplan) - destroy_rfftp_plan(plan->packplan); - DEALLOC(plan); - } - -size_t rfft_length(rfft_plan plan) - { - if (plan->packplan) return plan->packplan->length; - return plan->blueplan->n; - } - -size_t cfft_length(cfft_plan plan) - { - if (plan->packplan) return plan->packplan->length; - return plan->blueplan->n; - } - -WARN_UNUSED_RESULT int rfft_backward(rfft_plan plan, double c[], double fct) - { - if (plan->packplan) - return rfftp_backward(plan->packplan,c,fct); - else // if (plan->blueplan) - return rfftblue_backward(plan->blueplan,c,fct); - } - -WARN_UNUSED_RESULT int rfft_forward(rfft_plan plan, double c[], double fct) - { - if (plan->packplan) - return rfftp_forward(plan->packplan,c,fct); - else // if (plan->blueplan) - return rfftblue_forward(plan->blueplan,c,fct); - } diff --git a/numpy/fft/pocketfft/pocketfft.h b/numpy/fft/pocketfft/pocketfft.h deleted file mode 100644 index 16a71d256228..000000000000 --- a/numpy/fft/pocketfft/pocketfft.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * This file is part of pocketfft. - * Licensed under a 3-clause BSD style license - see LICENSE.md - */ - -/*! \file pocketfft.h - * Public interface of the pocketfft library - * - * Copyright (C) 2008-2018 Max-Planck-Society - * \author Martin Reinecke - */ - -#ifndef POCKETFFT_H -#define POCKETFFT_H - -#include -#include "numpy/numpyconfig.h" // for NPY_VISIBILITY_HIDDEN - -struct cfft_plan_i; -typedef struct cfft_plan_i * cfft_plan; -NPY_VISIBILITY_HIDDEN cfft_plan make_cfft_plan (size_t length); -NPY_VISIBILITY_HIDDEN void destroy_cfft_plan (cfft_plan plan); -NPY_VISIBILITY_HIDDEN int cfft_backward(cfft_plan plan, double c[], double fct); -NPY_VISIBILITY_HIDDEN int cfft_forward(cfft_plan plan, double c[], double fct); -NPY_VISIBILITY_HIDDEN size_t cfft_length(cfft_plan plan); - -struct rfft_plan_i; -typedef struct rfft_plan_i * rfft_plan; -NPY_VISIBILITY_HIDDEN rfft_plan make_rfft_plan (size_t length); -NPY_VISIBILITY_HIDDEN void destroy_rfft_plan (rfft_plan plan); -NPY_VISIBILITY_HIDDEN int rfft_backward(rfft_plan plan, double c[], double fct); -NPY_VISIBILITY_HIDDEN int rfft_forward(rfft_plan plan, double c[], double fct); -NPY_VISIBILITY_HIDDEN size_t rfft_length(rfft_plan plan); - -#endif diff --git a/numpy/fft/tests/test_pocketfft.py b/numpy/fft/tests/test_pocketfft.py index 9a3074ad22d2..e9f15754a369 100644 --- a/numpy/fft/tests/test_pocketfft.py +++ b/numpy/fft/tests/test_pocketfft.py @@ -33,28 +33,35 @@ def test_identity(self): assert_allclose(np.fft.irfft(np.fft.rfft(xr[0:i]), i), xr[0:i], atol=1e-12) - def test_identity_long_short(self): + @pytest.mark.parametrize("dtype", [np.single, np.double, np.longdouble]) + def test_identity_long_short(self, dtype): # Test with explicitly given number of points, both for n # smaller and for n larger than the input size. maxlen = 16 - x = random(maxlen) + 1j*random(maxlen) + atol = 4 * np.spacing(np.array(1., dtype=dtype)) + x = random(maxlen).astype(dtype) + 1j*random(maxlen).astype(dtype) xx = np.concatenate([x, np.zeros_like(x)]) - xr = random(maxlen) + xr = random(maxlen).astype(dtype) xxr = np.concatenate([xr, np.zeros_like(xr)]) for i in range(1, maxlen*2): - assert_allclose(np.fft.ifft(np.fft.fft(x, n=i), n=i), - xx[0:i], atol=1e-12) - assert_allclose(np.fft.irfft(np.fft.rfft(xr, n=i), n=i), - xxr[0:i], atol=1e-12) - - def test_identity_long_short_reversed(self): + check_c = np.fft.ifft(np.fft.fft(x, n=i), n=i) + assert check_c.real.dtype == dtype + assert_allclose(check_c, xx[0:i], atol=atol, rtol=0) + check_r = np.fft.irfft(np.fft.rfft(xr, n=i), n=i) + assert check_r.dtype == dtype + assert_allclose(check_r, xxr[0:i], atol=atol, rtol=0) + + @pytest.mark.parametrize("dtype", [np.single, np.double, np.longdouble]) + def test_identity_long_short_reversed(self, dtype): # Also test explicitly given number of points in reversed order. maxlen = 16 - x = random(maxlen) + 1j*random(maxlen) + atol = 4 * np.spacing(np.array(1., dtype=dtype)) + x = random(maxlen).astype(dtype) + 1j*random(maxlen).astype(dtype) xx = np.concatenate([x, np.zeros_like(x)]) for i in range(1, maxlen*2): - assert_allclose(np.fft.fft(np.fft.ifft(x, n=i), n=i), - xx[0:i], atol=1e-12) + check_via_c = np.fft.fft(np.fft.ifft(x, n=i), n=i) + assert check_via_c.dtype == x.dtype + assert_allclose(check_via_c, xx[0:i], atol=atol, rtol=0) # For irfft, we can neither recover the imaginary part of # the first element, nor the imaginary part of the last # element if npts is even. So, set to 0 for the comparison. @@ -64,8 +71,9 @@ def test_identity_long_short_reversed(self): if i % 2 == 0: y.imag[n-1:] = 0 yy = np.concatenate([y, np.zeros_like(y)]) - assert_allclose(np.fft.rfft(np.fft.irfft(x, n=i), n=i), - yy[0:n], atol=1e-12) + check_via_r = np.fft.rfft(np.fft.irfft(x, n=i), n=i) + assert check_via_r.dtype == x.dtype + assert_allclose(check_via_r, yy[0:n], atol=atol, rtol=0) def test_fft(self): x = random(30) + 1j*random(30) @@ -143,7 +151,7 @@ def test_fft_inplace_out(self, axis): assert_array_equal(y3[:, 15:], y[:, 15:]) # In-place with n > nin; rest should be unchanged. y4 = y.copy() - y4_sel = y[:10] if axis == 0 else y4[:, :10] + y4_sel = y4[:10] if axis == 0 else y4[:, :10] out4 = y4[:15] if axis == 0 else y4[:, :15] expected4 = np.fft.fft(y4_sel, n=15, axis=axis) result4 = np.fft.fft(y4_sel, n=15, axis=axis, out=out4) @@ -381,15 +389,6 @@ def test_all_1d_norm_preserving(self): assert_allclose(x_norm, np.linalg.norm(tmp), atol=1e-6) - @pytest.mark.parametrize("dtype", [np.half, np.single, np.double, - np.longdouble]) - def test_dtypes(self, dtype): - # make sure that all input precisions are accepted and internally - # converted to 64bit - x = random(30).astype(dtype) - assert_allclose(np.fft.ifft(np.fft.fft(x)), x, atol=1e-6) - assert_allclose(np.fft.irfft(np.fft.rfft(x)), x, atol=1e-6) - @pytest.mark.parametrize("axes", [(0, 1), (0, 2), None]) @pytest.mark.parametrize("dtype", (complex, float)) @pytest.mark.parametrize("transpose", (True, False))