Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f199212

Browse files
xwang233claude
andauthored
Fix lerp overload ambiguity with std::lerp under C++20 (#1985)
PyTorch commit ad56ff73b751 ("[2/12] Upgrade cpp_extension and cpp_builder to C++20", pytorch/pytorch#176659) changed the default C++ standard from C++17 to C++20 for extensions built via torch.utils.cpp_extension. Under C++20, std::lerp from <cmath> is visible alongside the custom lerp(float,float,float) defined in this file. When the third argument is c10::BFloat16 (implicitly convertible to float), the compiler finds two equally-valid overload candidates and fails with "more than one instance of overloaded function matches". Rename the custom lerp to _lerp to eliminate the ambiguity. Signed-off-by: Xiao Wang <[email protected]> Co-authored-by: Claude Opus 4.6 <[email protected]>
1 parent dbe421e commit f199212

1 file changed

Lines changed: 10 additions & 9 deletions

File tree

apex/contrib/csrc/optimizers/multi_tensor_distopt_adam_kernel.cu

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ __device__ __forceinline__ void load_store(T* dst, const T* src, int dst_offset
2727
}
2828

2929
// (1-t)*x + t*y
30-
__device__ __forceinline__ float lerp(float t, float x, float y) {
30+
// Note: Named _lerp to avoid ambiguity with std::lerp under C++20.
31+
__device__ __forceinline__ float _lerp(float t, float x, float y) {
3132
// See https://developer.nvidia.com/blog/lerp-faster-cuda/
3233
return fma(t, y, fma(-t, x, x));
3334
}
@@ -53,8 +54,8 @@ struct DistAdamFunctor {
5354
#pragma unroll
5455
for (int ii = 0; ii < ILP; ii++) {
5556
float scaled_grad = (g[ii] * grad_scale) + (weight_decay * p[ii]);
56-
float next_m = lerp(beta1, scaled_grad, m[ii]);
57-
float next_v = lerp(beta2, scaled_grad * scaled_grad, v[ii]);
57+
float next_m = _lerp(beta1, scaled_grad, m[ii]);
58+
float next_v = _lerp(beta2, scaled_grad * scaled_grad, v[ii]);
5859
float next_m_unbiased = next_m / beta1_correction;
5960
float next_v_unbiased = next_v / beta2_correction;
6061
float denom = sqrtf(next_v_unbiased) + eps;
@@ -67,8 +68,8 @@ struct DistAdamFunctor {
6768
#pragma unroll
6869
for (int ii = 0; ii < ILP; ii++) {
6970
float scaled_grad = g[ii] * grad_scale;
70-
float next_m = lerp(beta1, scaled_grad, m[ii]);
71-
float next_v = lerp(beta2, scaled_grad * scaled_grad, v[ii]);
71+
float next_m = _lerp(beta1, scaled_grad, m[ii]);
72+
float next_v = _lerp(beta2, scaled_grad * scaled_grad, v[ii]);
7273
float next_m_unbiased = next_m / beta1_correction;
7374
float next_v_unbiased = next_v / beta2_correction;
7475
float denom = sqrtf(next_v_unbiased) + eps;
@@ -183,8 +184,8 @@ struct DistAdamCapturableFunctor {
183184
#pragma unroll
184185
for (int ii = 0; ii < ILP; ii++) {
185186
float scaled_grad = (g[ii] * grad_scale) + (weight_decay * p[ii]);
186-
float next_m = lerp(beta1, scaled_grad, m[ii]);
187-
float next_v = lerp(beta2, scaled_grad * scaled_grad, v[ii]);
187+
float next_m = _lerp(beta1, scaled_grad, m[ii]);
188+
float next_v = _lerp(beta2, scaled_grad * scaled_grad, v[ii]);
188189
float next_m_unbiased = next_m / beta1_correction;
189190
float next_v_unbiased = next_v / beta2_correction;
190191
float denom = sqrtf(next_v_unbiased) + eps;
@@ -197,8 +198,8 @@ struct DistAdamCapturableFunctor {
197198
#pragma unroll
198199
for (int ii = 0; ii < ILP; ii++) {
199200
float scaled_grad = g[ii] * grad_scale;
200-
float next_m = lerp(beta1, scaled_grad, m[ii]);
201-
float next_v = lerp(beta2, scaled_grad * scaled_grad, v[ii]);
201+
float next_m = _lerp(beta1, scaled_grad, m[ii]);
202+
float next_v = _lerp(beta2, scaled_grad * scaled_grad, v[ii]);
202203
float next_m_unbiased = next_m / beta1_correction;
203204
float next_v_unbiased = next_v / beta2_correction;
204205
float denom = sqrtf(next_v_unbiased) + eps;

0 commit comments

Comments
 (0)