3232 */
3333#define PW_BLOCKSIZE 128
3434
35-
36- /*
37- * largest simd vector size in bytes numpy supports
38- * it is currently a extremely large value as it is only used for memory
39- * overlap checks
40- */
41- #ifndef NPY_MAX_SIMD_SIZE
42- #define NPY_MAX_SIMD_SIZE 1024
43- #endif
44-
4535/** Provides the various *_LOOP macros */
4636#include "fast_loop_macros.h"
4737
@@ -474,74 +464,15 @@ NPY_NO_EXPORT void
474464}
475465
476466/**begin repeat1
477- * #isa = , _avx2#
478- * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)#
479- * #ATTR = , NPY_GCC_TARGET_AVX2#
480- */
481-
482- #if @CHK @
483- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ void
484- @TYPE @_square @isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps , void * NPY_UNUSED (data ))
485- {
486- UNARY_LOOP_FAST (@type @, @type @, * out = in * in );
487- }
488- #endif
489-
490- #if @CHK @
491- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ void
492- @TYPE @_reciprocal @isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps , void * NPY_UNUSED (data ))
493- {
494- UNARY_LOOP_FAST (@type @, @type @, * out = 1.0 / in );
495- }
496- #endif
497-
498- #if @CHK @
499- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ void
500- @TYPE @_conjugate @isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps , void * NPY_UNUSED (func ))
501- {
502- UNARY_LOOP_FAST (@type @, @type @, * out = in );
503- }
504- #endif
505-
506- #if @CHK @
507- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ void
508- @TYPE @_logical_not @isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps , void * NPY_UNUSED (func ))
509- {
510- UNARY_LOOP_FAST (@type @, npy_bool , * out = !in );
511- }
512- #endif
513-
514- #if @CHK @
515- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ void
516- @TYPE @_invert @isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps , void * NPY_UNUSED (func ))
517- {
518- UNARY_LOOP_FAST (@type @, @type @, * out = ~in );
519- }
520- #endif
521-
522- /**begin repeat2
523467 * Arithmetic
524468 * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor#
525469 * #OP = +, -, *, &, |, ^#
526470 */
527471
528- #if @CHK @
529- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ void
530- @TYPE @_ @kind @@isa @(char * * args , npy_intp const * dimensions ,
531- npy_intp const * steps , void * NPY_UNUSED (func ))
532- {
533- if (IS_BINARY_REDUCE ) {
534- BINARY_REDUCE_LOOP_FAST (@type @, io1 @OP @= in2 );
535- }
536- else {
537- BINARY_LOOP_FAST (@type @, @type @, * out = in1 @OP @ in2 );
538- }
539- }
540-
541- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ int
542- @TYPE @_ @kind @@isa @_indexed (PyArrayMethod_Context * NPY_UNUSED (context ),
543- char * * args , npy_intp const * dimensions , npy_intp const * steps ,
544- void * NPY_UNUSED (func ))
472+ NPY_NO_EXPORT NPY_GCC_OPT_3 int
473+ @TYPE @_ @kind @_indexed (PyArrayMethod_Context * NPY_UNUSED (context ),
474+ char * * args , npy_intp const * dimensions , npy_intp const * steps ,
475+ void * NPY_UNUSED (func ))
545476{
546477 char * ip1 = args [0 ];
547478 char * indx = args [1 ];
@@ -556,86 +487,6 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ int
556487 }
557488 return 0 ;
558489}
559-
560- #endif
561-
562- /**end repeat2**/
563-
564- /*
565- * Arithmetic bit shift operations.
566- *
567- * Intel hardware masks bit shift values, so large shifts wrap around
568- * and can produce surprising results. The special handling ensures that
569- * behavior is independent of compiler or hardware.
570- * TODO: We could implement consistent behavior for negative shifts,
571- * which is undefined in C.
572- */
573-
574- #define INT_left_shift_needs_clear_floatstatus
575- #define UINT_left_shift_needs_clear_floatstatus
576-
577- #if @CHK @
578- NPY_NO_EXPORT NPY_GCC_OPT_3 void
579- @TYPE @_left_shift @isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps ,
580- void * NPY_UNUSED (func ))
581- {
582- BINARY_LOOP_FAST (@type @, @type @, * out = npy_lshift @c @(in1 , in2 ));
583-
584- #ifdef @TYPE @_left_shift_needs_clear_floatstatus
585- // For some reason, our macOS CI sets an "invalid" flag here, but only
586- // for some types.
587- npy_clear_floatstatus_barrier ( (char * )dimensions );
588- #endif
589- }
590- #endif
591-
592- #undef INT_left_shift_needs_clear_floatstatus
593- #undef UINT_left_shift_needs_clear_floatstatus
594-
595- #if @CHK @
596- NPY_NO_EXPORT
597- #ifndef NPY_DO_NOT_OPTIMIZE_ @TYPE @_right_shift
598- NPY_GCC_OPT_3
599- #endif
600- void
601- @TYPE @_right_shift @isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps ,
602- void * NPY_UNUSED (func ))
603- {
604- BINARY_LOOP_FAST (@type @, @type @, * out = npy_rshift @c @(in1 , in2 ));
605- }
606- #endif
607-
608- /**begin repeat2
609- * #kind = logical_and, logical_or#
610- * #OP = &&, ||#
611- */
612-
613- #if @CHK @
614- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ void
615- @TYPE @_ @kind @@isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps , void * NPY_UNUSED (func ))
616- {
617- /*
618- * gcc vectorization of this is not good (PR60575) but manual integer
619- * vectorization is too tedious to be worthwhile
620- */
621- BINARY_LOOP_FAST (@type @, npy_bool , * out = in1 @OP @ in2 );
622- }
623- #endif
624-
625- /**end repeat2**/
626-
627- #if @CHK @
628- NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR @ void
629- @TYPE @_logical_xor @isa @(char * * args , npy_intp const * dimensions , npy_intp const * steps , void * NPY_UNUSED (func ))
630- {
631- BINARY_LOOP {
632- const int t1 = !!* (@type @ * )ip1 ;
633- const int t2 = !!* (@type @ * )ip2 ;
634- * ((npy_bool * )op1 ) = (t1 != t2 );
635- }
636- }
637- #endif
638-
639490/**end repeat1**/
640491
641492NPY_NO_EXPORT void
@@ -1714,7 +1565,7 @@ HALF_@kind@_indexed(void *NPY_UNUSED(context),
17141565 const float v = npy_half_to_float (* (npy_half * )value );
17151566 * indexed = npy_float_to_half (npy_half_to_float (* indexed ) @OP @ v );
17161567 }
1717- return 0 ;
1568+ return 0 ;
17181569}
17191570/**end repeat**/
17201571
0 commit comments