diff --git a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp index 1e6e0d29e6da3..8a6466456e5aa 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp @@ -1102,9 +1102,12 @@ bool ProcessGroupNCCL::useNonblocking() { useNonblocking_ = nbEnv; } // 3rd priority: automatically use nonblocking if we are in eager init mode - else if (getBoundDeviceId()) { - useNonblocking_ = true; - } + // Note: this automatic selection is disabled in torch 2.7.1 to work around a + // hang in NCCL 2.26 in non-blocking mode. We can revisit if NCCL fixes the + // bug. See https://github.com/pytorch/pytorch/issues/153960 + // else if (getBoundDeviceId()) { + // useNonblocking_ = true; + // } // 4th priority: otherwise, nonblocking = false to preserve old behavior else { useNonblocking_ = false;