Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 73ba453

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 5d03c51 commit 73ba453

4 files changed

Lines changed: 24 additions & 23 deletions

File tree

csrc/multi_tensor_apply.cuh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
#include <ATen/cuda/CUDAContext.h>
44
#include <ATen/cuda/Exceptions.h>
55
#include <assert.h>
6-
#include <climits>
76
#include <c10/cuda/CUDAGuard.h>
87

8+
#include <climits>
9+
910
// #include <iostream>
1011

1112
// This header is the one-stop shop for all your multi-tensor apply needs.

csrc/multi_tensor_l2norm_kernel.cu

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,9 @@ std::tuple<at::Tensor, at::Tensor> multi_tensor_l2norm_cuda(int chunk_size, at::
321321
per_tensor ? output_per_tensor.data_ptr<float>() : nullptr, per_tensor,
322322
max_chunks_per_tensor);
323323
} else {
324-
multi_tensor_apply<1>(BLOCK_SIZE, chunk_size, noop_flag, tensor_lists,
325-
L2NormFunctor<scalar_t_0, int32_t>(), output.data_ptr<float>(),
326-
per_tensor ? output_per_tensor.data_ptr<float>() : nullptr, per_tensor,
327-
max_chunks_per_tensor);
324+
multi_tensor_apply<1>(BLOCK_SIZE, chunk_size, noop_flag, tensor_lists, L2NormFunctor<scalar_t_0, int32_t>(),
325+
output.data_ptr<float>(), per_tensor ? output_per_tensor.data_ptr<float>() : nullptr,
326+
per_tensor, max_chunks_per_tensor);
328327
})
329328

330329
AT_CUDA_CHECK(cudaGetLastError());
@@ -428,16 +427,17 @@ void multi_tensor_norm_out_cuda(int chunk_size, at::Tensor noop_flag, std::vecto
428427
output_per_tensor = at::zeros({ntensors * max_chunks_per_tensor}, float_options);
429428

430429
if (norm_type == 0) {
431-
DISPATCH_FLOAT_AND_HALF(tensor_lists[0][0].scalar_type(), 0, "multi_tensor_maxnorm_cuda",
432-
if (requires_64bit_indexing) {
433-
multi_tensor_apply<1>((int64_t)BLOCK_SIZE, (int64_t)chunk_size, noop_flag, tensor_lists,
434-
MaxNormFunctor<scalar_t_0, int64_t>(), output.data_ptr<float>(),
435-
output_per_tensor.data_ptr<float>(), true, max_chunks_per_tensor);
436-
} else {
437-
multi_tensor_apply<1>(BLOCK_SIZE, chunk_size, noop_flag, tensor_lists,
438-
MaxNormFunctor<scalar_t_0, int32_t>(), output.data_ptr<float>(),
439-
output_per_tensor.data_ptr<float>(), true, max_chunks_per_tensor);
440-
})
430+
DISPATCH_FLOAT_AND_HALF(
431+
tensor_lists[0][0].scalar_type(), 0, "multi_tensor_maxnorm_cuda",
432+
if (requires_64bit_indexing) {
433+
multi_tensor_apply<1>((int64_t)BLOCK_SIZE, (int64_t)chunk_size, noop_flag, tensor_lists,
434+
MaxNormFunctor<scalar_t_0, int64_t>(), output.data_ptr<float>(),
435+
output_per_tensor.data_ptr<float>(), true, max_chunks_per_tensor);
436+
} else {
437+
multi_tensor_apply<1>(BLOCK_SIZE, chunk_size, noop_flag, tensor_lists, MaxNormFunctor<scalar_t_0, int32_t>(),
438+
output.data_ptr<float>(), output_per_tensor.data_ptr<float>(), true,
439+
max_chunks_per_tensor);
440+
})
441441
} else {
442442
DISPATCH_FLOAT_HALF_AND_BFLOAT(
443443
tensor_lists[0][0].scalar_type(), 0, "multi_tensor_l2norm_cuda",
@@ -446,9 +446,9 @@ void multi_tensor_norm_out_cuda(int chunk_size, at::Tensor noop_flag, std::vecto
446446
L2NormFunctor<scalar_t_0, int64_t>(), output.data_ptr<float>(),
447447
output_per_tensor.data_ptr<float>(), true, max_chunks_per_tensor);
448448
} else {
449-
multi_tensor_apply<1>(BLOCK_SIZE, chunk_size, noop_flag, tensor_lists,
450-
L2NormFunctor<scalar_t_0, int32_t>(), output.data_ptr<float>(),
451-
output_per_tensor.data_ptr<float>(), true, max_chunks_per_tensor);
449+
multi_tensor_apply<1>(BLOCK_SIZE, chunk_size, noop_flag, tensor_lists, L2NormFunctor<scalar_t_0, int32_t>(),
450+
output.data_ptr<float>(), output_per_tensor.data_ptr<float>(), true,
451+
max_chunks_per_tensor);
452452
})
453453
}
454454
AT_CUDA_CHECK(cudaGetLastError());

csrc/multi_tensor_l2norm_kernel_mp.cu

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,9 @@ std::tuple<at::Tensor, at::Tensor> multi_tensor_l2norm_mp_cuda(int chunk_size, a
152152
per_tensor ? output_per_tensor.data_ptr<float>() : nullptr, per_tensor,
153153
max_chunks_per_tensor);
154154
} else {
155-
multi_tensor_apply<1>(BLOCK_SIZE, chunk_size, noop_flag, tensor_lists,
156-
L2NormFunctor<scalar_t_0, int32_t>(), output.data_ptr<float>(),
157-
per_tensor ? output_per_tensor.data_ptr<float>() : nullptr, per_tensor,
158-
max_chunks_per_tensor);
155+
multi_tensor_apply<1>(BLOCK_SIZE, chunk_size, noop_flag, tensor_lists, L2NormFunctor<scalar_t_0, int32_t>(),
156+
output.data_ptr<float>(), per_tensor ? output_per_tensor.data_ptr<float>() : nullptr,
157+
per_tensor, max_chunks_per_tensor);
159158
})
160159

161160
AT_CUDA_CHECK(cudaGetLastError());

tests/L0/run_optimizers/test_large_tensor_l2norm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
INT32_MAX = 2_147_483_647
1717
LARGE_NUMEL = INT32_MAX + 1
1818

19+
1920
@unittest.skipIf(not HAS_APEX, "`apex` is not found.")
2021
class LargeTensorL2NormTest(unittest.TestCase):
2122
def setUp(self):
@@ -77,4 +78,4 @@ def test_multi_tensor_l2norm_scale_large_tensor(self):
7778

7879

7980
if __name__ == "__main__":
80-
unittest.main()
81+
unittest.main()

0 commit comments

Comments
 (0)