Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 45b6a3f

Browse files
committed
Add support for fast math compiler flags when building ArrayFire
1 parent a4bb0a5 commit 45b6a3f

14 files changed

+72
-12
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ option(AF_WITH_STATIC_MKL "Link against static Intel MKL libraries" OFF)
9292
option(AF_WITH_STATIC_CUDA_NUMERIC_LIBS "Link libafcuda with static numeric libraries(cublas, cufft, etc.)" OFF)
9393
option(AF_WITH_SPDLOG_HEADER_ONLY "Build ArrayFire with header only version of spdlog" OFF)
9494
option(AF_WITH_FMT_HEADER_ONLY "Build ArrayFire with header only version of fmt" OFF)
95+
option(AF_WITH_FAST_MATH "Use lower precision but high performance numeric optimizations" OFF)
9596

9697
if(AF_WITH_STATIC_CUDA_NUMERIC_LIBS)
9798
option(AF_WITH_PRUNE_STATIC_CUDA_NUMERIC_LIBS "Prune CUDA static libraries to reduce binary size.(WARNING: May break some libs on older CUDA toolkits for some compute arch)" OFF)

CMakeModules/InternalUtils.cmake

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@ if(WIN32)
2525
check_cxx_compiler_flag(/permissive- cxx_compliance)
2626
endif()
2727

28+
check_cxx_compiler_flag(-ffast-math has_cxx_fast_math)
29+
check_cxx_compiler_flag("-fp-model fast" has_cxx_fp_model)
30+
check_cxx_compiler_flag(-fno-errno-math has_cxx_no_errno_math)
31+
check_cxx_compiler_flag(-fno-trapping-math has_cxx_no_trapping_math)
32+
check_cxx_compiler_flag(-fno-signed-zeros has_cxx_no_signed_zeros)
33+
check_cxx_compiler_flag(-mno-ieee-fp has_cxx_no_ieee_fp)
34+
2835
function(arrayfire_set_default_cxx_flags target)
2936
target_compile_options(${target}
3037
PRIVATE
@@ -51,7 +58,19 @@ function(arrayfire_set_default_cxx_flags target)
5158
# ignored attribute warnings in the OpenCL
5259
# headers
5360
$<$<BOOL:${has_ignored_attributes_flag}>:-Wno-ignored-attributes>
54-
$<$<BOOL:${has_all_warnings_flag}>:-Wall>>
61+
$<$<BOOL:${has_all_warnings_flag}>:-Wall>
62+
63+
$<$<BOOL:${AF_WITH_FAST_MATH}>:
64+
$<$<BOOL:${has_cxx_fast_math}>:-ffast-math>
65+
$<$<BOOL:${has_cxx_no_errno_math}>:-fno-errno-math>
66+
$<$<BOOL:${has_cxx_no_trapping_math}>:-fno-trapping-math>
67+
$<$<BOOL:${has_cxx_no_signed_zeros}>:-fno-signed-zeros>
68+
$<$<BOOL:${has_cxx_no_ieee_fp}>:-mno-ieee-fp>
69+
>
70+
71+
$<$<NOT:$<BOOL:${AF_WITH_FAST_MATH}>>:
72+
$<$<BOOL:${has_cxx_fp_model}>:-fp-model precise>>
73+
>
5574
)
5675

5776
target_compile_definitions(${target}
@@ -65,6 +84,7 @@ function(arrayfire_set_default_cxx_flags target)
6584

6685
$<$<BOOL:${AF_WITH_LOGGING}>: AF_WITH_LOGGING>
6786
$<$<BOOL:${AF_CACHE_KERNELS_TO_DISK}>: AF_CACHE_KERNELS_TO_DISK>
87+
$<$<BOOL:${AF_WITH_FAST_MATH}>: AF_WITH_FAST_MATH>
6888
)
6989
endfunction()
7090

test/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ target_include_directories(arrayfire_test
124124
# The tautological-constant-compare warning is always thrown for std::nan
125125
# and std::info calls. Its unnecessarily verbose.
126126
target_compile_options(arrayfire_test
127+
PUBLIC
128+
# Intel compilers use fast math by default and ignore special floating point
129+
# values like NaN and Infs.
130+
$<$<BOOL:${has_cxx_fp_model}>:-fp-model precise>
127131
PRIVATE
128132
$<$<BOOL:${has_tautological_constant_compare_flag}>:-Wno-tautological-constant-compare>
129133
$<$<CXX_COMPILER_ID:MSVC>: /bigobj
@@ -137,6 +141,8 @@ if(WIN32)
137141
endif()
138142

139143
target_compile_definitions(arrayfire_test
144+
PUBLIC
145+
$<$<BOOL:${AF_WITH_FAST_MATH}>:AF_WITH_FAST_MATH>
140146
PRIVATE
141147
TEST_RESULT_IMAGE_DIR="${CMAKE_BINARY_DIR}/test/"
142148
USE_MTX)

test/approx1.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,9 @@ TEST(Approx1, CPPUniformInvalidStepSize) {
777777
// specified by the user, ArrayFire will assume a regular grid with a
778778
// starting index of 0 and a step value of 1.
779779
TEST(Approx1, CPPInfCheck) {
780+
#ifdef __INTEL_LLVM_COMPILER
781+
SKIP_IF_FAST_MATH_ENABLED();
782+
#endif
780783
array sampled(seq(0.0, 5.0, 0.5));
781784
sampled(0) = af::Inf;
782785
seq xo(0.0, 2.0, 0.25);
@@ -799,6 +802,9 @@ TEST(Approx1, CPPInfCheck) {
799802
}
800803

801804
TEST(Approx1, CPPUniformInfCheck) {
805+
#ifdef __INTEL_LLVM_COMPILER
806+
SKIP_IF_FAST_MATH_ENABLED();
807+
#endif
802808
array sampled(seq(10.0, 50.0, 10.0));
803809
sampled(0) = af::Inf;
804810
seq xo(0.0, 8.0, 2.0);

test/half.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ TEST(Half, arith) {
8787

8888
TEST(Half, isInf) {
8989
SUPPORTED_TYPE_CHECK(af_half);
90+
SKIP_IF_FAST_MATH_ENABLED();
9091
half_float::half hinf = std::numeric_limits<half_float::half>::infinity();
9192

9293
vector<half_float::half> input(2, half_float::half(0));
@@ -105,6 +106,7 @@ TEST(Half, isInf) {
105106

106107
TEST(Half, isNan) {
107108
SUPPORTED_TYPE_CHECK(af_half);
109+
SKIP_IF_FAST_MATH_ENABLED();
108110
half_float::half hnan = std::numeric_limits<half_float::half>::quiet_NaN();
109111

110112
vector<half_float::half> input(2, half_float::half(0));

test/imageio.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,17 +289,17 @@ TEST(ImageIO, SaveImage16CPP) {
289289
dim4 dims(16, 24, 3);
290290

291291
array input = randu(dims, u16);
292-
array input_255 = (input / 257).as(u16);
292+
array input_255 = floor(input.as(f32) / 257);
293293

294294
std::string testname = getTestName() + "_" + getBackendName();
295295
std::string imagename = "saveImage16CPP_" + testname + ".png";
296296

297297
saveImage(imagename.c_str(), input);
298298

299299
array img = loadImage(imagename.c_str(), true);
300-
ASSERT_EQ(img.type(), f32); // loadImage should always return float
301300

302-
ASSERT_FALSE(anyTrue<bool>(abs(img - input_255)));
301+
ASSERT_EQ(img.type(), f32); // loadImage should always return float
302+
ASSERT_IMAGES_NEAR(input_255, img, 0.001);
303303
}
304304

305305
////////////////////////////////////////////////////////////////////////////////

test/ireduce.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ TEST(IndexedReduce, MaxReduceDimensionHasSingleValue) {
192192
}
193193

194194
TEST(IndexedReduce, MinNaN) {
195+
SKIP_IF_FAST_MATH_ENABLED();
195196
float test_data[] = {1.f, NAN, 5.f, 0.1f, NAN, -0.5f, NAN, 0.f};
196197
int rows = 4;
197198
int cols = 2;
@@ -218,6 +219,7 @@ TEST(IndexedReduce, MinNaN) {
218219
}
219220

220221
TEST(IndexedReduce, MaxNaN) {
222+
SKIP_IF_FAST_MATH_ENABLED();
221223
float test_data[] = {1.f, NAN, 5.f, 0.1f, NAN, -0.5f, NAN, 0.f};
222224
int rows = 4;
223225
int cols = 2;
@@ -244,6 +246,7 @@ TEST(IndexedReduce, MaxNaN) {
244246
}
245247

246248
TEST(IndexedReduce, MinCplxNaN) {
249+
SKIP_IF_FAST_MATH_ENABLED();
247250
float real_wnan_data[] = {0.005f, NAN, -6.3f, NAN, -0.5f,
248251
NAN, NAN, 0.2f, -1205.4f, 8.9f};
249252

@@ -279,6 +282,7 @@ TEST(IndexedReduce, MinCplxNaN) {
279282
}
280283

281284
TEST(IndexedReduce, MaxCplxNaN) {
285+
SKIP_IF_FAST_MATH_ENABLED();
282286
float real_wnan_data[] = {0.005f, NAN, -6.3f, NAN, -0.5f,
283287
NAN, NAN, 0.2f, -1205.4f, 8.9f};
284288

test/meanvar.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ class MeanVarTyped : public ::testing::TestWithParam<meanvar_test<T>> {
131131
ASSERT_VEC_ARRAY_NEAR(test.variance_, outDim, var, 0.5f);
132132
} else if (is_same_type<float, outType<T>>::value ||
133133
is_same_type<cfloat, outType<T>>::value) {
134-
ASSERT_VEC_ARRAY_NEAR(test.mean_, outDim, mean, 0.001f);
134+
ASSERT_VEC_ARRAY_NEAR(test.mean_, outDim, mean, 0.0016f);
135135
ASSERT_VEC_ARRAY_NEAR(test.variance_, outDim, var, 0.2f);
136136
} else {
137137
ASSERT_VEC_ARRAY_NEAR(test.mean_, outDim, mean, 0.00001f);
@@ -171,7 +171,7 @@ class MeanVarTyped : public ::testing::TestWithParam<meanvar_test<T>> {
171171
ASSERT_VEC_ARRAY_NEAR(test.variance_, outDim, var, 0.5f);
172172
} else if (is_same_type<float, outType<T>>::value ||
173173
is_same_type<cfloat, outType<T>>::value) {
174-
ASSERT_VEC_ARRAY_NEAR(test.mean_, outDim, mean, 0.001f);
174+
ASSERT_VEC_ARRAY_NEAR(test.mean_, outDim, mean, 0.0016f);
175175
ASSERT_VEC_ARRAY_NEAR(test.variance_, outDim, var, 0.2f);
176176
} else {
177177
ASSERT_VEC_ARRAY_NEAR(test.mean_, outDim, mean, 0.00001f);

test/median.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,20 +93,21 @@ void median_test(int nx, int ny = 1, int nz = 1, int nw = 1) {
9393

9494
if (sa.dims(dim) % 2 == 1) {
9595
mSeq[dim] = mSeq[dim] - 1.0;
96+
sa = sa.as((af_dtype)dtype_traits<To>::af_type);
9697
verify = sa(mSeq[0], mSeq[1], mSeq[2], mSeq[3]);
9798
} else {
9899
dim_t sdim[4] = {0};
99100
sdim[dim] = 1;
100101
sa = sa.as((af_dtype)dtype_traits<To>::af_type);
101102
array sas = shift(sa, sdim[0], sdim[1], sdim[2], sdim[3]);
102-
verify = ((sa + sas) / 2)(mSeq[0], mSeq[1], mSeq[2], mSeq[3]);
103+
verify = ((sa + sas) / To(2))(mSeq[0], mSeq[1], mSeq[2], mSeq[3]);
103104
}
104105

105106
// Test Part
106107
array out = median(a, dim);
107108

108109
ASSERT_EQ(out.dims() == verify.dims(), true);
109-
ASSERT_NEAR(0, sum<double>(abs(out - verify)), 1e-5);
110+
ASSERT_ARRAYS_EQ(verify, out);
110111
}
111112

112113
#define MEDIAN_FLAT(To, Ti) \

test/reduce.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,7 @@ TEST(ReduceByKey, countReduceByKey) {
779779
}
780780

781781
TEST(ReduceByKey, ReduceByKeyNans) {
782+
SKIP_IF_FAST_MATH_ENABLED();
782783
const static int testSz = 8;
783784
const int testKeys[testSz] = {0, 2, 2, 9, 5, 5, 5, 8};
784785
const float testVals[testSz] = {0, 7, NAN, 6, 2, 5, 3, 4};
@@ -1072,6 +1073,7 @@ TYPED_TEST(Reduce, Test_Any_Global) {
10721073
}
10731074

10741075
TEST(MinMax, MinMaxNaN) {
1076+
SKIP_IF_FAST_MATH_ENABLED();
10751077
const int num = 10000;
10761078
array A = randu(num);
10771079
A(where(A < 0.25)) = NaN;
@@ -1095,6 +1097,7 @@ TEST(MinMax, MinMaxNaN) {
10951097
}
10961098

10971099
TEST(MinMax, MinCplxNaN) {
1100+
SKIP_IF_FAST_MATH_ENABLED();
10981101
float real_wnan_data[] = {0.005f, NAN, -6.3f, NAN, -0.5f,
10991102
NAN, NAN, 0.2f, -1205.4f, 8.9f};
11001103

@@ -1122,6 +1125,7 @@ TEST(MinMax, MinCplxNaN) {
11221125
}
11231126

11241127
TEST(MinMax, MaxCplxNaN) {
1128+
SKIP_IF_FAST_MATH_ENABLED();
11251129
// 4th element is unusually large to cover the case where
11261130
// one part holds the largest value among the array,
11271131
// and the other part is NaN.
@@ -1158,6 +1162,7 @@ TEST(MinMax, MaxCplxNaN) {
11581162
}
11591163

11601164
TEST(Count, NaN) {
1165+
SKIP_IF_FAST_MATH_ENABLED();
11611166
const int num = 10000;
11621167
array A = round(5 * randu(num));
11631168
array B = A;
@@ -1168,6 +1173,7 @@ TEST(Count, NaN) {
11681173
}
11691174

11701175
TEST(Sum, NaN) {
1176+
SKIP_IF_FAST_MATH_ENABLED();
11711177
const int num = 10000;
11721178
array A = randu(num);
11731179
A(where(A < 0.25)) = NaN;
@@ -1187,6 +1193,7 @@ TEST(Sum, NaN) {
11871193
}
11881194

11891195
TEST(Product, NaN) {
1196+
SKIP_IF_FAST_MATH_ENABLED();
11901197
const int num = 5;
11911198
array A = randu(num);
11921199
A(2) = NaN;
@@ -1206,6 +1213,7 @@ TEST(Product, NaN) {
12061213
}
12071214

12081215
TEST(AnyAll, NaN) {
1216+
SKIP_IF_FAST_MATH_ENABLED();
12091217
const int num = 10000;
12101218
array A = (randu(num) > 0.5).as(f32);
12111219
array B = A;
@@ -2263,6 +2271,7 @@ TYPED_TEST(Reduce, Test_Any_Global_Array) {
22632271

22642272

22652273
TEST(Reduce, Test_Sum_Global_Array_nanval) {
2274+
SKIP_IF_FAST_MATH_ENABLED();
22662275
const int num = 100000;
22672276
array a = af::randn(num, 2, 34, 4);
22682277
a(1, 0, 0, 0) = NAN;

0 commit comments

Comments
 (0)