Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions modules/dnn/perf/perf_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -975,4 +975,49 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Softmax, Combine(
/* withCann= */ false) // only test on CPU
));

using Layer_Elementwise = TestBaseWithParam<tuple<std::vector<int>, std::string, tuple<Backend, Target>>>;
PERF_TEST_P_(Layer_Elementwise, elementwise) {
std::vector<int> input_shape = get<0>(GetParam());
std::string op = get<1>(GetParam());
int backend_id = get<0>(get<2>(GetParam()));
int target_id = get<1>(get<2>(GetParam()));

Mat input(input_shape, CV_32F);
randn(input, 0.f, 1.f);

LayerParams lp;
lp.type = op;
lp.name = "TestLayer";

Net net;
net.addLayerToPrev(lp.name, lp.type, lp);

// Warmup
{
net.setInput(input);
net.setPreferableBackend(backend_id);
net.setPreferableTarget(target_id);
Mat out = net.forward();
}

TEST_CYCLE() {
net.forward();
}

SANITY_CHECK_NOTHING();
}

INSTANTIATE_TEST_CASE_P(/**/, Layer_Elementwise, testing::Combine(
testing::Values(std::vector<int>{1, 50, 3072}),
testing::Values(std::string{"Gelu"}),
dnnBackendsAndTargets(/* withInferenceEngine= */ true,
/* withHalide= */ false,
/* withCpuOCV= */ true,
/* withVkCom= */ false,
/* withCUDA= */ true,
/* withNgraph= */ true,
/* withWebnn= */ false,
/* withCann= */ false) // only test on CPU
));

} // namespace
131 changes: 118 additions & 13 deletions modules/dnn/src/layers/elementwise_layers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,20 +813,82 @@ struct BaseDefaultFunctor : public BaseFunctor
static const char* const ocl_kernel_name;
};

struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
{
typedef GeluLayer Layer;
namespace {
// Refer to v_erf in modules/core/include/opencv2/core/hal/intrin_math.hpp
constexpr float c_erf_coef0 = 0.3275911f;
constexpr float c_erf_coef1 = 1.061405429f;
constexpr float c_erf_coef2 = -1.453152027f;
constexpr float c_erf_coef3 = 1.421413741f;
constexpr float c_erf_coef4 = -0.284496736f;
constexpr float c_erf_coef5 = 0.254829592f;

inline float erf_approx(float v) {
float t = 1.f / fmaf(fabsf(v), c_erf_coef0, 1.f);
float r = fmaf(c_erf_coef1, t, c_erf_coef2);
r = fmaf(r, t, c_erf_coef3);
r = fmaf(r, t, c_erf_coef4);
r = fmaf(r, t, c_erf_coef5);
r = 1.f - r * t * expf(-v * v);
return std::copysignf(r, v);
}
}

explicit GeluFunctor() {}
struct GeluFunctor : public BaseFunctor {
using Layer = GeluLayer;
int vlanes;

bool supportBackend(int backendId, int)
{
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
explicit GeluFunctor() {
#if (CV_SIMD || CV_SIMD_SCALABLE)
vlanes = VTraits<v_float32>::vlanes();
#else
vlanes = 1;
#endif
}

inline float calculate(float x) const
{
return 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
bool supportBackend(int backendId, int) {
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
}

void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const {
CV_UNUSED(stripeStart);
for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) {
int i = 0;
#if (CV_SIMD || CV_SIMD_SCALABLE)
// 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
v_float32 half = vx_setall_f32(0.5f),
one = vx_setall_f32(1.0f),
reciprocal_sqrt2 = vx_setall_f32(M_SQRT1_2);
for (; i <= len - vlanes; i += vlanes) {
if (i + vlanes > len) {
if (i == 0 || i == len) {
break;
}
i = len - vlanes;
}
v_float32 x0 = vx_load(srcptr + i);

// t = x * M_SQRT1_2
v_float32 t0 = v_mul(reciprocal_sqrt2, x0);

// t = 1.0f + t
t0 = v_add(one, v_erf(t0));

// x = 0.5 * x
x0 = v_mul(half, x0);

// x = x * t
x0 = v_mul(x0, t0);

vx_store(dstptr + i, x0);
}
#endif
// 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
for( ; i < len; i++ )
{
float x = srcptr[i];
dstptr[i] = 0.5f * x * (1.0f + erf_approx(x * M_SQRT1_2));
}
}
}

#ifdef HAVE_CUDA
Expand All @@ -836,12 +898,55 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
}
#endif

#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
String buildopt = oclGetTMacro(src);

if (!ker.create("GeluForward", ocl::dnn::activations_oclsrc, buildopt))
return false;

return true;
}

bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;

inps.getUMatVector(inputs);
outs.getUMatVector(outputs);

for (size_t i = 0; i < inputs.size(); i++)
{
UMat& src = inputs[i];
UMat& dst = outputs[i];
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);

ocl::Kernel kernel;
CV_Assert(initKernel(kernel, src));
kernel.set(0, (int)src.total());
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));

size_t gSize = src.total();
CV_Assert(kernel.run(1, &gSize, NULL, false));
}

return true;
}
#endif

#ifdef HAVE_DNN_NGRAPH
std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
{
return std::make_shared<ov::op::v0::Gelu>(node);
}
#endif // HAVE_DNN_NGRAPH

int64 getFLOPSPerElement() const { return 100; }
};

template<>
const char* const BaseDefaultFunctor<GeluFunctor>::ocl_kernel_name = "GeluForward";

namespace GeluApproximationConstants
{
static constexpr float sqrt_2_pi = 0.7978845834732056f;
Expand Down
4 changes: 4 additions & 0 deletions modules/dnn/src/opencl/activations.cl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif

#if !defined(M_SQRT1_2)
#define M_SQRT1_2 0.707106781186547524400844362104849039 /* 1/sqrt(2) */
#endif

__kernel void ReLUForward(const int count, __global const T* in, __global T* out
#ifndef RELU_NO_SLOPE
, KERNEL_ARG_DTYPE negative_slope
Expand Down
44 changes: 44 additions & 0 deletions modules/dnn/test/test_onnx_conformance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,14 @@ static const TestCase testConformanceConfig[] = {
{"test_gathernd_example_float32", 2, 1},
{"test_gathernd_example_int32", 2, 1},
{"test_gathernd_example_int32_batch_dim1", 2, 1},
{"test_gelu_default_1", 1, 1},
{"test_gelu_default_1_expanded", 1, 1},
{"test_gelu_default_2", 1, 1},
{"test_gelu_default_2_expanded", 1, 1},
{"test_gelu_tanh_1", 1, 1},
{"test_gelu_tanh_1_expanded", 1, 1},
{"test_gelu_tanh_2", 1, 1},
{"test_gelu_tanh_2_expanded", 1, 1},
{"test_gemm_all_attributes", 3, 1},
{"test_gemm_alpha", 3, 1},
{"test_gemm_beta", 3, 1},
Expand Down Expand Up @@ -1123,6 +1131,19 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
if (name == "test_pow") {
default_lInf = 0.00013; // Expected: (normInf) <= (lInf), actual: 0.00012207 vs 0.0001
}
if (name == "test_gelu_tanh_1") {
default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101805 vs 1e-05
default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152707 vs 0.0001
}
if (name == "test_gelu_tanh_2") {
if (target == DNN_TARGET_OPENCL_FP16) {
default_l1 = 0.00016; // Expected: (normL1) <= (l1), actual: 0.000157223 vs 9e-05
default_lInf = 0.0016; // Expected: (normInf) <= (lInf), actual: 0.00153041 vs 0.0005
} else {
default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80073e-05 vs 1e-05
default_lInf = 0.0005; // Expected: (normInf) <= (lInf), actual: 0.000455521 vs 0.0001
}
}
}
#ifdef HAVE_HALIDE
else if (backend == DNN_BACKEND_HALIDE)
Expand All @@ -1146,6 +1167,15 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}

if (name == "test_gelu_tanh_1") {
default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101805 vs 1e-05
default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152707 vs 0.0001
}
if (name == "test_gelu_tanh_2") {
default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80073e-05 vs 1e-05
default_lInf = 0.0005; // Expected: (normInf) <= (lInf), actual: 0.000455521 vs 0.0001
}
}
#endif
#ifdef HAVE_CUDA
Expand All @@ -1159,6 +1189,20 @@ TEST_P(Test_ONNX_conformance, Layer_Test)
{
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE);
}

if (name == "test_gelu_tanh_1") {
default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101815 vs 1e-05
default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152737 vs 0.0001
}
if (name == "test_gelu_tanh_2") {
if (target == DNN_TARGET_CUDA_FP16) {
default_l1 = 0.00023; // Expected: (normL1) <= (l1), actual: 0.000220591 vs 9e-05
default_lInf = 0.0023; // Expected: (normInf) <= (lInf), actual: 0.00220466 vs 0.0005
} else {
default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80127e-05 vs 1e-05
default_lInf = 0.0005; // Expected: (normInf) <= (lInf), actual: 0.000455445 vs 0.0001
}
}
}
#endif
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,36 @@ CASE(test_gathernd_example_int32)
// no filter
CASE(test_gathernd_example_int32_batch_dim1)
// no filter
CASE(test_gelu_default_1)
// no filter
CASE(test_gelu_default_1_expanded)
// no filter
CASE(test_gelu_default_2)
// no filter
CASE(test_gelu_default_2_expanded)
// no filter
CASE(test_gelu_tanh_1)
if (target == DNN_TARGET_CPU) {
default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101805 vs 1e-05
default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152707 vs 0.0001
}
if (target == DNN_TARGET_OPENCL) {
default_l1 = 0.00011; // Expected: (normL1) <= (l1), actual: 0.000101815 vs 1e-05
default_lInf = 0.00016; // Expected: (normInf) <= (lInf), actual: 0.000152737 vs 0.0001
}
CASE(test_gelu_tanh_1_expanded)
// no filter
CASE(test_gelu_tanh_2)
if (target == DNN_TARGET_CPU) {
default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80057e-05 vs 1e-05
default_lInf = 0.00046; // Expected: (normInf) <= (lInf), actual: 0.000455521 vs 0.0001
}
if (target == DNN_TARGET_OPENCL) {
default_l1 = 9e-5; // Expected: (normL1) <= (l1), actual: 8.80144e-05 vs 1e-05
default_lInf = 0.00046; // Expected: (normInf) <= (lInf), actual: 0.000455445 vs 0.0001
}
CASE(test_gelu_tanh_2_expanded)
// no filter
CASE(test_gemm_all_attributes)
// no filter
CASE(test_gemm_alpha)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@
"test_gathernd_example_float32",
"test_gathernd_example_int32",
"test_gathernd_example_int32_batch_dim1",
"test_gelu_default_1_expanded", // parser: no corresponding layer for CastLike
"test_gelu_default_2_expanded", // parser: no corresponding layer for CastLike
"test_gelu_tanh_1_expanded", // parser: no corresponding layer for CastLike
"test_gelu_tanh_2_expanded", // parser: no corresponding layer for CastLike
"test_gemm_all_attributes",
"test_gemm_alpha",
"test_gemm_beta",
Expand Down
9 changes: 9 additions & 0 deletions modules/dnn/test/test_onnx_importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3149,6 +3149,15 @@ TEST_P(Test_ONNX_nets, ViT_B_32) {
l1 = 0.008;
lInf = 0.04;
}
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
if (target == DNN_TARGET_CPU) {
l1 = 4.4e-5; // Expected: (normL1) <= (l1), actual: 4.31208e-05 vs 1e-05
lInf = 0.0002; // Expected: (normInf) <= (lInf), actual: 0.000194907 vs 0.0001
} else if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
l1 = 0.0092; // Expected: (normL1) <= (l1), actual: 0.00918349 vs 4.4e-05
lInf = 0.056; // Expected: (normInf) <= (lInf), actual: 0.0556431 vs 0.0002
}
}

normAssert(ref, out, "ViTB_32", l1, lInf);
}
Expand Down