Thanks to visit codestin.com
Credit goes to github.com

Skip to content

ENH: Move dispatch-able umath fast-loops to the new dispatcher #16396

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 60 additions & 25 deletions numpy/core/code_generators/generate_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ class TypeDescription:
simd: list
Available SIMD ufunc loops, dispatched at runtime in specified order
Currently only supported for simples types (see make_arrays)
dispatch: list
Available SIMD ufunc loops, dispatched at runtime in specified order
Currently only supported for simples types (see make_arrays)
"""
def __init__(self, type, f=None, in_=None, out=None, astype=None, simd=None):
def __init__(self, type, f=None, in_=None, out=None, astype=None, simd=None, dispatch=None):
self.type = type
self.func_data = f
if astype is None:
Expand All @@ -62,6 +65,15 @@ def __init__(self, type, f=None, in_=None, out=None, astype=None, simd=None):
out = out.replace('P', type)
self.out = out
self.simd = simd
"""
TODO:
- remove 'SIMD' after integrate the rest of simd loops
- support astype, this can be done by cover each func in NPYV intrinsics,
also, SIMD kernels of PyUFunc_*_As_* must handle remained scalars as vectors,
since func_data only takes one ptr for each type,
a lot of work I know but it will leave a good memory.
"""
self.dispatch = dispatch

def finish_signature(self, nin, nout):
if self.in_ is None:
Expand All @@ -86,7 +98,7 @@ def build_func_data(types, f):
func_data = [_fdata_map.get(t, '%s') % (f,) for t in types]
return func_data

def TD(types, f=None, astype=None, in_=None, out=None, simd=None):
def TD(types, f=None, astype=None, in_=None, out=None, simd=None, dispatch=None):
if f is not None:
if isinstance(f, str):
func_data = build_func_data(types, f)
Expand Down Expand Up @@ -115,7 +127,14 @@ def TD(types, f=None, astype=None, in_=None, out=None, simd=None):
simdt = [k for k, v in simd if t in v]
else:
simdt = []
tds.append(TypeDescription(t, f=fd, in_=i, out=o, astype=astype, simd=simdt))
# [(dispatch file name without extension '.dispatch.c*', list of types)]
if dispatch:
dispt = [k for k, v in dispatch if t in v]
else:
dispt = []
tds.append(TypeDescription(
t, f=fd, in_=i, out=o, astype=astype, simd=simdt, dispatch=dispt
))
return tds

class Ufunc:
Expand Down Expand Up @@ -269,7 +288,8 @@ def english_upper(s):
Ufunc(2, 1, Zero,
docstrings.get('numpy.core.umath.add'),
'PyUFunc_AdditionTypeResolver',
TD(notimes_or_obj, simd=[('avx512f', cmplxvec),('avx2', ints)]),
TD(notimes_or_obj, simd=[('avx512f', cmplxvec)],
dispatch=[('loops_fast', ints)]),
[TypeDescription('M', FullTypeDescr, 'Mm', 'M'),
TypeDescription('m', FullTypeDescr, 'mm', 'm'),
TypeDescription('M', FullTypeDescr, 'mM', 'M'),
Expand All @@ -280,7 +300,8 @@ def english_upper(s):
Ufunc(2, 1, None, # Zero is only a unit to the right, not the left
docstrings.get('numpy.core.umath.subtract'),
'PyUFunc_SubtractionTypeResolver',
TD(ints + inexact, simd=[('avx512f', cmplxvec),('avx2', ints)]),
TD(ints + inexact, simd=[('avx512f', cmplxvec)],
dispatch=[('loops_fast', ints)]),
[TypeDescription('M', FullTypeDescr, 'Mm', 'M'),
TypeDescription('m', FullTypeDescr, 'mm', 'm'),
TypeDescription('M', FullTypeDescr, 'MM', 'm'),
Expand All @@ -291,7 +312,8 @@ def english_upper(s):
Ufunc(2, 1, One,
docstrings.get('numpy.core.umath.multiply'),
'PyUFunc_MultiplicationTypeResolver',
TD(notimes_or_obj, simd=[('avx512f', cmplxvec),('avx2', ints)]),
TD(notimes_or_obj, simd=[('avx512f', cmplxvec)],
dispatch=[('loops_fast', ints)]),
[TypeDescription('m', FullTypeDescr, 'mq', 'm'),
TypeDescription('m', FullTypeDescr, 'qm', 'm'),
TypeDescription('m', FullTypeDescr, 'md', 'm'),
Expand Down Expand Up @@ -326,7 +348,8 @@ def english_upper(s):
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.conjugate'),
None,
TD(ints+flts+cmplx, simd=[('avx2', ints), ('avx512f', cmplxvec)]),
TD(ints+flts+cmplx, simd=[('avx512f', cmplxvec)],
dispatch=[('loops_fast', ints)]),
TD(P, f='conjugate'),
),
'fmod':
Expand All @@ -341,14 +364,16 @@ def english_upper(s):
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.square'),
None,
TD(ints+inexact, simd=[('avx2', ints), ('fma', 'fd'), ('avx512f', 'FDfd')]),
TD(ints+inexact, simd=[('fma', 'fd'), ('avx512f', 'FDfd')],
dispatch=[('loops_fast', ints)]),
TD(O, f='Py_square'),
),
'reciprocal':
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.reciprocal'),
None,
TD(ints+inexact, simd=[('avx2', ints), ('fma', 'fd'), ('avx512f','fd')]),
TD(ints+inexact, simd=[('fma', 'fd'), ('avx512f','fd')],
dispatch=[('loops_fast', ints)]),
TD(O, f='Py_reciprocal'),
),
# This is no longer used as numpy.ones_like, however it is
Expand Down Expand Up @@ -392,7 +417,7 @@ def english_upper(s):
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.negative'),
'PyUFunc_NegativeTypeResolver',
TD(ints+flts+timedeltaonly, simd=[('avx2', ints)]),
TD(ints+flts+timedeltaonly, dispatch=[('loops_fast', ints)]),
TD(cmplx, f='neg'),
TD(O, f='PyNumber_Negative'),
),
Expand All @@ -414,71 +439,71 @@ def english_upper(s):
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.greater'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
TD(all, out='?', simd=[('avx2', ints)]),
TD(all, out='?', dispatch=[('loops_fast', ints)]),
[TypeDescription('O', FullTypeDescr, 'OO', 'O')],
TD('O', out='?'),
),
'greater_equal':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.greater_equal'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
TD(all, out='?', simd=[('avx2', ints)]),
TD(all, out='?', dispatch=[('loops_fast', ints)]),
[TypeDescription('O', FullTypeDescr, 'OO', 'O')],
TD('O', out='?'),
),
'less':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.less'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
TD(all, out='?', simd=[('avx2', ints)]),
TD(all, out='?', dispatch=[('loops_fast', ints)]),
[TypeDescription('O', FullTypeDescr, 'OO', 'O')],
TD('O', out='?'),
),
'less_equal':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.less_equal'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
TD(all, out='?', simd=[('avx2', ints)]),
TD(all, out='?', dispatch=[('loops_fast', ints)]),
[TypeDescription('O', FullTypeDescr, 'OO', 'O')],
TD('O', out='?'),
),
'equal':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.equal'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
TD(all, out='?', simd=[('avx2', ints)]),
TD(all, out='?', dispatch=[('loops_fast', ints)]),
[TypeDescription('O', FullTypeDescr, 'OO', 'O')],
TD('O', out='?'),
),
'not_equal':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.not_equal'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
TD(all, out='?', simd=[('avx2', ints)]),
TD(all, out='?', dispatch=[('loops_fast', ints)]),
[TypeDescription('O', FullTypeDescr, 'OO', 'O')],
TD('O', out='?'),
),
'logical_and':
Ufunc(2, 1, True_,
docstrings.get('numpy.core.umath.logical_and'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]),
TD(nodatetime_or_obj, out='?', dispatch=[('loops_fast', ints)]),
TD(O, f='npy_ObjectLogicalAnd'),
TD(O, f='npy_ObjectLogicalAnd', out='?'),
),
'logical_not':
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.logical_not'),
None,
TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]),
TD(nodatetime_or_obj, out='?', dispatch=[('loops_fast', ints)]),
TD(O, f='npy_ObjectLogicalNot'),
TD(O, f='npy_ObjectLogicalNot', out='?'),
),
'logical_or':
Ufunc(2, 1, False_,
docstrings.get('numpy.core.umath.logical_or'),
'PyUFunc_SimpleBinaryComparisonTypeResolver',
TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]),
TD(nodatetime_or_obj, out='?', dispatch=[('loops_fast', ints)]),
TD(O, f='npy_ObjectLogicalOr'),
TD(O, f='npy_ObjectLogicalOr', out='?'),
),
Expand Down Expand Up @@ -540,42 +565,42 @@ def english_upper(s):
Ufunc(2, 1, AllOnes,
docstrings.get('numpy.core.umath.bitwise_and'),
None,
TD(bints, simd=[('avx2', ints)]),
TD(bints, dispatch=[('loops_fast', ints)]),
TD(O, f='PyNumber_And'),
),
'bitwise_or':
Ufunc(2, 1, Zero,
docstrings.get('numpy.core.umath.bitwise_or'),
None,
TD(bints, simd=[('avx2', ints)]),
TD(bints, dispatch=[('loops_fast', ints)]),
TD(O, f='PyNumber_Or'),
),
'bitwise_xor':
Ufunc(2, 1, Zero,
docstrings.get('numpy.core.umath.bitwise_xor'),
None,
TD(bints, simd=[('avx2', ints)]),
TD(bints, dispatch=[('loops_fast', ints)]),
TD(O, f='PyNumber_Xor'),
),
'invert':
Ufunc(1, 1, None,
docstrings.get('numpy.core.umath.invert'),
None,
TD(bints, simd=[('avx2', ints)]),
TD(bints, dispatch=[('loops_fast', ints)]),
TD(O, f='PyNumber_Invert'),
),
'left_shift':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.left_shift'),
None,
TD(ints, simd=[('avx2', ints)]),
TD(ints, dispatch=[('loops_fast', ints)]),
TD(O, f='PyNumber_Lshift'),
),
'right_shift':
Ufunc(2, 1, None,
docstrings.get('numpy.core.umath.right_shift'),
None,
TD(ints, simd=[('avx2', ints)]),
TD(ints, dispatch=[('loops_fast', ints)]),
TD(O, f='PyNumber_Rshift'),
),
'heaviside':
Expand Down Expand Up @@ -1024,6 +1049,16 @@ def make_arrays(funcdict):
ISA=vt.upper(), isa=vt,
fname=name, type=tname, idx=k
))
if t.dispatch is not None:
for dname in t.dispatch:
code2list.append(textwrap.dedent("""\
#ifndef NPY_DISABLE_OPTIMIZATION
#include "{dname}.dispatch.h"
#endif
NPY_CPU_DISPATCH_CALL_XB({name}_functions[{k}] = {tname}_{name})
""").format(
dname=dname, name=name, tname=tname, k=k
))
else:
funclist.append('NULL')
try:
Expand Down
1 change: 1 addition & 0 deletions numpy/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,7 @@ def generate_umath_c(ext, build_dir):
join('src', 'umath', 'scalarmath.c.src'),
join('src', 'umath', 'ufunc_type_resolution.c'),
join('src', 'umath', 'override.c'),
join('src', 'umath', 'loops_fast.dispatch.c.src'),
]

umath_deps = [
Expand Down
Loading