-
Notifications
You must be signed in to change notification settings - Fork 56
Expand file tree
/
Copy pathDEL_DOT_VEC_2D.hpp
More file actions
157 lines (135 loc) · 4.62 KB
/
DEL_DOT_VEC_2D.hpp
File metadata and controls
157 lines (135 loc) · 4.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) Lawrence Livermore National Security, LLC and other
// RAJA Project Developers. See top-level LICENSE and COPYRIGHT
// files for dates and other details. No copyright assignment is required
// to contribute to RAJA Performance Suite.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
///
/// DEL_DOT_VEC_2D kernel reference implementation:
///
/// for (Index_type ii = ibegin; ii < iend; ++ii ) {
/// Index_type i = real_zones[ii];
///
/// Real_type xi = half * ( x1[i] + x2[i] - x3[i] - x4[i] ) ;
/// Real_type xj = half * ( x2[i] + x3[i] - x4[i] - x1[i] ) ;
///
/// Real_type yi = half * ( y1[i] + y2[i] - y3[i] - y4[i] ) ;
/// Real_type yj = half * ( y2[i] + y3[i] - y4[i] - y1[i] ) ;
///
/// Real_type fxi = half * ( fx1[i] + fx2[i] - fx3[i] - fx4[i] ) ;
/// Real_type fxj = half * ( fx2[i] + fx3[i] - fx4[i] - fx1[i] ) ;
///
/// Real_type fyi = half * ( fy1[i] + fy2[i] - fy3[i] - fy4[i] ) ;
/// Real_type fyj = half * ( fy2[i] + fy3[i] - fy4[i] - fy1[i] ) ;
///
/// Real_type rarea = 1.0 / ( xi * yj - xj * yi + ptiny ) ;
///
/// Real_type dfxdx = rarea * ( fxi * yj - fxj * yi ) ;
///
/// Real_type dfydy = rarea * ( fyj * xi - fyi * xj ) ;
///
/// Real_type affine = ( fy1[i] + fy2[i] + fy3[i] + fy4[i] ) /
/// ( y1[i] + y2[i] + y3[i] + y4[i] ) ;
///
/// div[i] = dfxdx + dfydy + affine ;
/// }
///
#ifndef RAJAPerf_Apps_DEL_DOT_VEC_2D_HPP
#define RAJAPerf_Apps_DEL_DOT_VEC_2D_HPP
#define DEL_DOT_VEC_2D_DATA_SETUP \
Real_ptr x = m_x; \
Real_ptr y = m_y; \
Real_ptr xdot = m_xdot; \
Real_ptr ydot = m_ydot; \
Real_ptr div = m_div; \
\
const Real_type ptiny = m_ptiny; \
const Real_type half = m_half; \
\
Real_ptr x1,x2,x3,x4 ; \
Real_ptr y1,y2,y3,y4 ; \
Real_ptr fx1,fx2,fx3,fx4 ; \
Real_ptr fy1,fy2,fy3,fy4 ; \
\
NDSET2D(m_domain->jp, x,x1,x2,x3,x4) ; \
NDSET2D(m_domain->jp, y,y1,y2,y3,y4) ; \
NDSET2D(m_domain->jp, xdot,fx1,fx2,fx3,fx4) ; \
NDSET2D(m_domain->jp, ydot,fy1,fy2,fy3,fy4) ; \
\
Index_ptr real_zones = m_real_zones;
#define DEL_DOT_VEC_2D_BODY_INDEX \
Index_type i = real_zones[ii];
#define DEL_DOT_VEC_2D_BODY \
\
Real_type xi = half * ( x1[i] + x2[i] - x3[i] - x4[i] ) ; \
Real_type xj = half * ( x2[i] + x3[i] - x4[i] - x1[i] ) ; \
\
Real_type yi = half * ( y1[i] + y2[i] - y3[i] - y4[i] ) ; \
Real_type yj = half * ( y2[i] + y3[i] - y4[i] - y1[i] ) ; \
\
Real_type fxi = half * ( fx1[i] + fx2[i] - fx3[i] - fx4[i] ) ; \
Real_type fxj = half * ( fx2[i] + fx3[i] - fx4[i] - fx1[i] ) ; \
\
Real_type fyi = half * ( fy1[i] + fy2[i] - fy3[i] - fy4[i] ) ; \
Real_type fyj = half * ( fy2[i] + fy3[i] - fy4[i] - fy1[i] ) ; \
\
Real_type rarea = 1.0 / ( xi * yj - xj * yi + ptiny ) ; \
\
Real_type dfxdx = rarea * ( fxi * yj - fxj * yi ) ; \
\
Real_type dfydy = rarea * ( fyj * xi - fyi * xj ) ; \
\
Real_type affine = ( fy1[i] + fy2[i] + fy3[i] + fy4[i] ) / \
( y1[i] + y2[i] + y3[i] + y4[i] ) ; \
\
div[i] = dfxdx + dfydy + affine ;
#include "common/KernelBase.hpp"
namespace rajaperf
{
class RunParams;
namespace apps
{
class ADomain;
class DEL_DOT_VEC_2D : public KernelBase
{
public:
DEL_DOT_VEC_2D(const RunParams& params);
~DEL_DOT_VEC_2D();
void setSize(Index_type target_size, Index_type target_reps);
void setUp(VariantID vid, size_t tune_idx);
void updateChecksum(VariantID vid, size_t tune_idx);
void tearDown(VariantID vid, size_t tune_idx);
void defineSeqVariantTunings();
void defineOpenMPVariantTunings();
void defineOpenMPTargetVariantTunings();
void defineCudaVariantTunings();
void defineHipVariantTunings();
void defineSyclVariantTunings();
void runSeqVariant(VariantID vid);
void runOpenMPVariant(VariantID vid);
void runOpenMPTargetVariant(VariantID vid);
template < size_t block_size >
void runCudaVariantImpl(VariantID vid);
template < size_t block_size >
void runHipVariantImpl(VariantID vid);
template < size_t work_group_size >
void runSyclVariantImpl(VariantID vid);
private:
static const size_t default_gpu_block_size = 256;
using gpu_block_sizes_type = integer::make_gpu_block_size_list_type<default_gpu_block_size>;
Real_ptr m_x;
Real_ptr m_y;
Real_ptr m_xdot;
Real_ptr m_ydot;
Real_ptr m_div;
Real_type m_ptiny;
Real_type m_half;
std::unique_ptr<ADomain> m_domain;
Index_type* m_real_zones;
Index_type m_array_length;
};
} // end namespace apps
} // end namespace rajaperf
#endif // closing endif for header file include guard