From 01a91970c2e9ba9484d034d46e6cf5a54b789c0c Mon Sep 17 00:00:00 2001 From: rusty1s Date: Tue, 28 Jan 2025 11:49:23 +0000 Subject: [PATCH 1/6] update --- pyg_lib/csrc/classes/cuda/hash_map_impl.cu | 69 ++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 pyg_lib/csrc/classes/cuda/hash_map_impl.cu diff --git a/pyg_lib/csrc/classes/cuda/hash_map_impl.cu b/pyg_lib/csrc/classes/cuda/hash_map_impl.cu new file mode 100644 index 000000000..101cdaf05 --- /dev/null +++ b/pyg_lib/csrc/classes/cuda/hash_map_impl.cu @@ -0,0 +1,69 @@ +#include +#include + +#include "../hash_map_impl.h" + +namespace pyg { +namespace classes { + +namespace { + +template +struct CUDAHashMapImpl : HashMapImpl { + public: + using ValueType = int64_t; + + CUDAHashMapImpl(const at::Tensor& key) { + KeyType constexpr empty_key_sentinel = -1; // TODO + ValueType constexpr empty_value_sentinel = -1; + + map_ = std::make_unique>( + 2 * key.numel(), // loader_factor = 0.5 + cuco::empty_key{empty_key_sentinel}, + cuco::empty_value{empty_value_sentinel}); + + const auto options = + at::TensorOptions().device(key.device()).dtype(at::kLong); + const auto value = at::arange(key.numel(), options); + const auto key_data = key.data_ptr(); + const auto value_data = value.data_ptr(); + + map_->insert(key_data, value_data, key.numel()); + } + + at::Tensor get(const at::Tensor& query) override { + const auto options = + at::TensorOptions().device(query.device()).dtype(at::kLong); + const auto out = at::empty({query.numel()}, options); + const auto query_data = query.data_ptr(); + auto out_data = out.data_ptr(); + + map_->find(query_data, out_data, query.numel()); + + return out; + } + + private: + std::unique_ptr> map_; +}; + +// template struct CUDAHashMapImpl; +// template struct CUDAHashMapImpl; +// template struct CUDAHashMapImpl; +// template struct CUDAHashMapImpl; +// template struct CUDAHashMapImpl; +// template struct CUDAHashMapImpl; +// template struct CUDAHashMapImpl; +// template struct CUDAHashMapImpl; + +struct CUDAHashMap : torch::CustomClassHolder { + public: + CUDAHashMap(const at::Tensor& key) {} + + at::Tensor get(const at::Tensor& query) { return query; } +}; + +} // namespace + +} // namespace classes +} // namespace pyg From a3f446c61b0b35993d1ff5e5385ac9760f95176b Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 29 Jan 2025 14:48:17 +0000 Subject: [PATCH 2/6] update --- pyg_lib/csrc/classes/cuda/hash_map_impl.cu | 104 ++++++++++++++++----- test/csrc/classes/test_hash_map.cpp | 1 + 2 files changed, 84 insertions(+), 21 deletions(-) diff --git a/pyg_lib/csrc/classes/cuda/hash_map_impl.cu b/pyg_lib/csrc/classes/cuda/hash_map_impl.cu index 101cdaf05..55232db6a 100644 --- a/pyg_lib/csrc/classes/cuda/hash_map_impl.cu +++ b/pyg_lib/csrc/classes/cuda/hash_map_impl.cu @@ -1,69 +1,131 @@ #include +#include #include - -#include "../hash_map_impl.h" +#include namespace pyg { namespace classes { namespace { +#define DISPATCH_CASE_KEY(...) \ + AT_DISPATCH_CASE(at::ScalarType::Short, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Int, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Long, __VA_ARGS__) + +#define DISPATCH_KEY(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, DISPATCH_CASE_KEY(__VA_ARGS__)) + +struct HashMapImpl { + virtual ~HashMapImpl() = default; + virtual at::Tensor get(const at::Tensor& query) = 0; + virtual at::Tensor keys() = 0; +}; + template struct CUDAHashMapImpl : HashMapImpl { public: using ValueType = int64_t; CUDAHashMapImpl(const at::Tensor& key) { - KeyType constexpr empty_key_sentinel = -1; // TODO + KeyType constexpr empty_key_sentinel = std::numeric_limits::min(); ValueType constexpr empty_value_sentinel = -1; map_ = std::make_unique>( - 2 * key.numel(), // loader_factor = 0.5 + 2 * key.numel(), // load_factor = 0.5 cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}); + const auto key_data = key.data_ptr(); const auto options = - at::TensorOptions().device(key.device()).dtype(at::kLong); + key.options().dtype(c10::CppTypeToScalarType::value); const auto value = at::arange(key.numel(), options); - const auto key_data = key.data_ptr(); const auto value_data = value.data_ptr(); + const auto zipped = + thrust::make_zip_iterator(thrust::make_tuple(key_data, value_data)); - map_->insert(key_data, value_data, key.numel()); + map_->insert(zipped, zipped + key.numel()); } at::Tensor get(const at::Tensor& query) override { const auto options = - at::TensorOptions().device(query.device()).dtype(at::kLong); + query.options().dtype(c10::CppTypeToScalarType::value); const auto out = at::empty({query.numel()}, options); const auto query_data = query.data_ptr(); - auto out_data = out.data_ptr(); + auto out_data = out.data_ptr(); - map_->find(query_data, out_data, query.numel()); + map_->find(query_data, query_data + query.numel(), out_data); return out; } + at::Tensor keys() override { + // TODO This will not work in multi-GPU scenarios. + const auto options = at::TensorOptions().device(at::DeviceType::CUDA); + const auto size = static_cast(map_->size()); + const auto key = at::empty( + {size}, options.dtype(c10::CppTypeToScalarType::value)); + const auto value = at::empty( + {size}, options.dtype(c10::CppTypeToScalarType::value)); + auto key_data = key.data_ptr(); + auto value_data = value.data_ptr(); + + map_->retrieve_all(key_data, value_data); + + return key.index_select(0, value.argsort()); + } + private: std::unique_ptr> map_; }; -// template struct CUDAHashMapImpl; -// template struct CUDAHashMapImpl; -// template struct CUDAHashMapImpl; -// template struct CUDAHashMapImpl; -// template struct CUDAHashMapImpl; -// template struct CUDAHashMapImpl; -// template struct CUDAHashMapImpl; -// template struct CUDAHashMapImpl; - struct CUDAHashMap : torch::CustomClassHolder { public: - CUDAHashMap(const at::Tensor& key) {} + CUDAHashMap(const at::Tensor& key) { + at::TensorArg key_arg{key, "key", 0}; + at::CheckedFrom c{"CUDAHashMap.init"}; + at::checkDeviceType(c, key, at::DeviceType::CUDA); + at::checkDim(c, key_arg, 1); + at::checkContiguous(c, key_arg); + + DISPATCH_KEY(key.scalar_type(), "cuda_hash_map_init", [&] { + map_ = std::make_unique>(key); + }); + } - at::Tensor get(const at::Tensor& query) { return query; } + at::Tensor get(const at::Tensor& query) { + at::TensorArg query_arg{query, "query", 0}; + at::CheckedFrom c{"CUDAHashMap.get"}; + at::checkDeviceType(c, query, at::DeviceType::CUDA); + at::checkDim(c, query_arg, 1); + at::checkContiguous(c, query_arg); + + return map_->get(query); + } + + at::Tensor keys() { return map_->keys(); } + + private: + std::unique_ptr map_; }; } // namespace +TORCH_LIBRARY_FRAGMENT(pyg, m) { + m.class_("CUDAHashMap") + .def(torch::init()) + .def("get", &CUDAHashMap::get) + .def("keys", &CUDAHashMap::keys) + .def_pickle( + // __getstate__ + [](const c10::intrusive_ptr& self) -> at::Tensor { + return self->keys(); + }, + // __setstate__ + [](const at::Tensor& state) -> c10::intrusive_ptr { + return c10::make_intrusive(state); + }); +} + } // namespace classes } // namespace pyg diff --git a/test/csrc/classes/test_hash_map.cpp b/test/csrc/classes/test_hash_map.cpp index 813fbe46a..a0109332d 100644 --- a/test/csrc/classes/test_hash_map.cpp +++ b/test/csrc/classes/test_hash_map.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "pyg_lib/csrc/classes/hash_map.h" From ba3fc7d2e1ab98560a29c5ed9045d3d20830391c Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 29 Jan 2025 16:16:16 +0000 Subject: [PATCH 3/6] update --- test/csrc/classes/test_hash_map.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/csrc/classes/test_hash_map.cpp b/test/csrc/classes/test_hash_map.cpp index a0109332d..813fbe46a 100644 --- a/test/csrc/classes/test_hash_map.cpp +++ b/test/csrc/classes/test_hash_map.cpp @@ -1,6 +1,5 @@ #include #include -#include #include "pyg_lib/csrc/classes/hash_map.h" From 2fc606c1cf4d35cdd25dbfa6ce4a9a3dd26c85d2 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 1 Feb 2025 20:09:41 +0000 Subject: [PATCH 4/6] update --- pyg_lib/csrc/classes/cuda/hash_map_impl.cu | 24 ++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pyg_lib/csrc/classes/cuda/hash_map_impl.cu b/pyg_lib/csrc/classes/cuda/hash_map_impl.cu index 55232db6a..a8a2faa64 100644 --- a/pyg_lib/csrc/classes/cuda/hash_map_impl.cu +++ b/pyg_lib/csrc/classes/cuda/hash_map_impl.cu @@ -52,7 +52,7 @@ struct CUDAHashMapImpl : HashMapImpl { query.options().dtype(c10::CppTypeToScalarType::value); const auto out = at::empty({query.numel()}, options); const auto query_data = query.data_ptr(); - auto out_data = out.data_ptr(); + const auto out_data = out.data_ptr(); map_->find(query_data, query_data + query.numel(), out_data); @@ -61,14 +61,22 @@ struct CUDAHashMapImpl : HashMapImpl { at::Tensor keys() override { // TODO This will not work in multi-GPU scenarios. - const auto options = at::TensorOptions().device(at::DeviceType::CUDA); + const auto options = at::TensorOptions() + .device(at::DeviceType::CUDA) + .dtype(c10::CppTypeToScalarType::value); const auto size = static_cast(map_->size()); - const auto key = at::empty( - {size}, options.dtype(c10::CppTypeToScalarType::value)); - const auto value = at::empty( - {size}, options.dtype(c10::CppTypeToScalarType::value)); - auto key_data = key.data_ptr(); - auto value_data = value.data_ptr(); + + at::Tensor key; + if (std::is_same::value) { + key = at::empty({size}, options.dtype(at::kShort)); + } else if (std::is_same::value) { + key = at::empty({size}, options.dtype(at::kInt)); + } else { + key = at::empty({size}, options); + } + const auto value = at::empty({size}, options); + const auto key_data = key.data_ptr(); + const auto value_data = value.data_ptr(); map_->retrieve_all(key_data, value_data); From e658af1b9cdf233c0b7d685268706d5004ff690d Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 1 Feb 2025 20:37:22 +0000 Subject: [PATCH 5/6] update --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ea26fefc3..5387aec43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.15) +cmake_minimum_required(VERSION 3.18) project(pyg) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -43,6 +43,7 @@ if(WITH_CUDA) enable_language(CUDA) add_definitions(-DWITH_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -allow-unsupported-compiler") + set(CMAKE_CUDA_ARCHITECTURES "60;70;75;80;86;90") if (NOT "$ENV{EXTERNAL_CUTLASS_INCLUDE_DIR}" STREQUAL "") include_directories($ENV{EXTERNAL_CUTLASS_INCLUDE_DIR}) From 08039b9ad852d8fa761cf3bbf4d9d9c0342b68ec Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 1 Feb 2025 20:39:19 +0000 Subject: [PATCH 6/6] update --- .github/workflows/cuda/Linux-env.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cuda/Linux-env.sh b/.github/workflows/cuda/Linux-env.sh index f519b347e..d048e6065 100755 --- a/.github/workflows/cuda/Linux-env.sh +++ b/.github/workflows/cuda/Linux-env.sh @@ -4,42 +4,42 @@ case ${1} in cu124) export FORCE_CUDA=1 export PATH=/usr/local/cuda-12.4/bin:${PATH} - export TORCH_CUDA_ARCH_LIST="5.0+PTX;6.0;7.0;7.5;8.0;8.6;9.0" + export TORCH_CUDA_ARCH_LIST="6.0+PTX;7.0;7.5;8.0;8.6;9.0" ;; cu121) export FORCE_CUDA=1 export PATH=/usr/local/cuda-12.1/bin:${PATH} - export TORCH_CUDA_ARCH_LIST="5.0+PTX;6.0;7.0;7.5;8.0;8.6;9.0" + export TORCH_CUDA_ARCH_LIST="6.0+PTX;7.0;7.5;8.0;8.6;9.0" ;; cu118) export FORCE_CUDA=1 export PATH=/usr/local/cuda-11.8/bin:${PATH} - export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6;9.0" + export TORCH_CUDA_ARCH_LIST="6.0+PTX;7.0;7.5;8.0;8.6;9.0" ;; cu117) export FORCE_CUDA=1 export PATH=/usr/local/cuda-11.7/bin:${PATH} - export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + export TORCH_CUDA_ARCH_LIST="6.0+PTX;7.0;7.5;8.0;8.6" ;; cu116) export FORCE_CUDA=1 export PATH=/usr/local/cuda-11.6/bin:${PATH} - export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + export TORCH_CUDA_ARCH_LIST="6.0+PTX;7.0;7.5;8.0;8.6" ;; cu115) export FORCE_CUDA=1 export PATH=/usr/local/cuda-11.5/bin:${PATH} - export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + export TORCH_CUDA_ARCH_LIST="6.0+PTX;7.0;7.5;8.0;8.6" ;; cu113) export FORCE_CUDA=1 export PATH=/usr/local/cuda-11.3/bin:${PATH} - export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + export TORCH_CUDA_ARCH_LIST="6.0+PTX;7.0;7.5;8.0;8.6" ;; cu102) export FORCE_CUDA=1 export PATH=/usr/local/cuda-10.2/bin:${PATH} - export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" + export TORCH_CUDA_ARCH_LIST="6.0+PTX;7.0;7.5" ;; *) ;;