From 1fc812689c7bdeefaabdb06aba4e993210986334 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 9 Oct 2018 14:00:35 -0700 Subject: [PATCH 01/23] View op outputs are not registered as views when !GradMode::enabled() --- test/test_autograd.py | 40 +++++++++------- tools/autograd/gen_autograd.py | 1 + tools/autograd/gen_variable_type.py | 3 +- torch/csrc/autograd/VariableTypeUtils.h | 11 +++-- torch/csrc/autograd/variable.cpp | 6 --- torch/csrc/autograd/variable.h | 64 +++++++++++++++++++++---- 6 files changed, 90 insertions(+), 35 deletions(-) diff --git a/test/test_autograd.py b/test/test_autograd.py index b42ab95993a9e..49cd858228859 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -727,28 +727,36 @@ def scope(): def test_no_grad(self): x = torch.ones(5, 5, requires_grad=True) - y = Variable(torch.ones(5, 5) * 4) - with torch.no_grad(): - w = x + y + y = torch.ones(5, 5) * 4 - @torch.no_grad() def adder(x, y): return x + y - z = adder(x, y) + def viewer(x, y): + return x[1] - self.assertFalse(w.requires_grad) - self.assertRaises(RuntimeError, lambda: w.backward(torch.ones(5, 5))) - self.assertIsNone(w.grad_fn) - self.assertFalse(z.requires_grad) - self.assertRaises(RuntimeError, lambda: z.backward(torch.ones(5, 5))) - self.assertIsNone(z.grad_fn) + for binary_op in (adder, viewer): + with torch.no_grad(): + w = binary_op(x, y) - # test nested decorator and with-statement on no_grad - with torch.no_grad(): - self.assertFalse(torch.is_grad_enabled()) - w = adder(x, y) - self.assertFalse(torch.is_grad_enabled()) + @torch.no_grad() + def decorated(x, y): + return binary_op(x, y) + + z = decorated(x, y) + + self.assertFalse(w.requires_grad) + self.assertRaises(RuntimeError, lambda: w.backward(torch.ones(5, 5))) + self.assertIsNone(w.grad_fn) + self.assertFalse(z.requires_grad) + self.assertRaises(RuntimeError, lambda: z.backward(torch.ones(5, 5))) + self.assertIsNone(z.grad_fn) + + # test nested decorator and with-statement on no_grad + with torch.no_grad(): + self.assertFalse(torch.is_grad_enabled()) + w = binary_op(x, y) + self.assertFalse(torch.is_grad_enabled()) def test_no_grad_python_function(self): """Python Functions should respect grad mode.""" diff --git a/tools/autograd/gen_autograd.py b/tools/autograd/gen_autograd.py index af62f0ad48903..e23d0b881b035 100644 --- a/tools/autograd/gen_autograd.py +++ b/tools/autograd/gen_autograd.py @@ -14,6 +14,7 @@ from collections import defaultdict from .utils import YamlLoader, split_name_params +# See NOTE [ Autograd Variable Views ] in variable.h for details. VIEW_FUNCTIONS = { 'alias', 'as_strided', 'diagonal', 'expand', 'narrow', 'permute', 'select', 'slice', 'squeeze', 't', 'transpose', 'unfold', 'unsqueeze', 'view', 'unbind', diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index 26a2a9539efbc..a2162585ec8e6 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -458,7 +458,8 @@ def wrap_output(call): if 'Tensor' not in declaration['return_type']: return call elif is_view: - return 'as_view(self, {})'.format(call) + # See NOTE [ Autograd Variable Views ] in variable.h for details. + return 'as_view(self, {}, GradMode::is_enabled())'.format(call) else: return 'as_variable({})'.format(call) diff --git a/torch/csrc/autograd/VariableTypeUtils.h b/torch/csrc/autograd/VariableTypeUtils.h index d28149e128257..7f94cd0747e3d 100644 --- a/torch/csrc/autograd/VariableTypeUtils.h +++ b/torch/csrc/autograd/VariableTypeUtils.h @@ -103,21 +103,24 @@ template inline variable_list flatten_tensor_args(Args&&... ar return out; // RVO } -inline Tensor as_view(const Tensor & base, Tensor tensor) { +// See NOTE [ Autograd Variable Views ] for details. +inline Tensor as_view(const Tensor & base, Tensor tensor, bool potential_history_tracking = true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } - return make_variable_view(std::move(base_var), std::move(tensor)); + return make_variable_view(std::move(base_var), std::move(tensor), potential_history_tracking); } -inline std::vector as_view(const Tensor & base, std::vector tensors) { +// See NOTE [ Autograd Variable Views ] for details. +inline std::vector as_view(const Tensor & base, std::vector tensors, + bool potential_history_tracking = true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } for(Tensor &tensor : tensors) { - tensor = make_variable_view(base_var, std::move(tensor)); + tensor = make_variable_view(base_var, std::move(tensor), potential_history_tracking); } return tensors; } diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp index c36a52a71a279..025b64a23b137 100644 --- a/torch/csrc/autograd/variable.cpp +++ b/torch/csrc/autograd/variable.cpp @@ -115,12 +115,6 @@ std::shared_ptr Variable::Impl::get_grad_accumulator() { return result; } -Variable Variable::Impl::detach() const { - auto detached = make_variable(data_, /*requires_grad=*/false); - detached.set_version_counter(version_counter_); - return detached; -} - void Variable::Impl::detach_() { if (is_view_) { AT_ERROR("Can't detach views in-place. Use detach() instead"); diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 804be552392d8..ef519fff2b7c7 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -63,6 +63,7 @@ struct Function; /// meaningful for `Variable` relations that are relevant to autograd. For /// example, if you hide your code from autograd using `.data`, the `Variable`s /// will not be registered as having view relations, even if they share storage. +/// See NOTE [ Autograd Variable Views ] for more details. /// /// /// Interface @@ -92,9 +93,13 @@ struct TORCH_API Variable : public at::Tensor { /// Creates a `Variable` that is a *view* of another (*base*) variable. /// The `gradient_edge` is an optional (gradient_function, input_number) pair. + /// `potential_history_tracking` is a bool that specifies whether this view + /// relation should be considered by autograd. + /// See NOTE [ Autograd Variable Views ] for details. friend Variable make_variable_view( Variable base, at::Tensor data, + bool potential_history_tracking, Edge gradient_edge); /// Creates a `Variable` from the given `Tensor`. `requires_grad` should be @@ -330,7 +335,6 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { return grad_; } - Variable detach() const; void detach_(); void set_data(Tensor new_data); @@ -375,10 +379,45 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { // Variable::ViewImpl //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// A Variable that is a view on another Variable. The base and view share the -/// same version_counter. The grad_fn field of the Variable may become stale -/// due to in-place modifications of the shared data. Accesses should go -/// through get_grad_fn(). All other fields are always valid. +/// NOTE [ Autograd Variable Views ] +/// +/// A Variable that is a view of another Variable (called base Variable), i.e., +/// they share storage, **and** may potentially record gradient flow between the +/// two Variables. Even if the base currently does not require grad, it is still +/// important to record this view relation to support operations like: +/// +/// # Have: +/// # base.requires_grad = False +/// # var.requires_grad = True +/// base[1] = var +/// torch.autograd.grad(base.sum(), var) <- should return an all ones tensor +/// +/// Above example is effectively base[1].copy_(var). To support this, in the +/// rebase_history of base[1], we need to get the update the grad_fn of the base +/// Variable. Therefore, we still record the view relation between base and +/// base[1] even though they don't require gradients at creation time. +/// +/// Another similar example is: +/// +/// # Have: +/// # base.requires_grad = False +/// # var.requires_grad = True +/// base.copy_(var) +/// torch.autograd.grad(base[1].sum(), var) <- should return an all ones tensor +/// +/// +/// In a view relation, the base and view Variables share the same +/// version_counter. The grad_fn field of the Variable may become stale due to +/// in-place modifications of the shared data. Accesses should go through +/// get_grad_fn(). All other fields are always valid. +/// +/// NB: Some views will never require gradient history tracking, and will not be +/// counted as views (i.e., having is_view() = true and using ViewImpl). +/// Instead, they will be usual Variables and just sharing the version +// counters with the base Variables. Some examples are: +/// 1. views created from .detach(), +/// 2. views created when GradMode::enabled() = false. +/// Relevant logic is implemented in make_variable_view. struct TORCH_API Variable::ViewImpl : public Variable::Impl { ViewImpl(Variable base, at::Tensor data, Edge gradient_edge); @@ -414,13 +453,22 @@ struct TORCH_API Variable::ViewImpl : public Variable::Impl { // Factory Functions //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// See NOTE [ Autograd Variable Views ] for details. inline Variable make_variable_view( Variable base, at::Tensor data, + bool potential_history_tracking = true, Edge gradient_edge = Edge()) { if (data.defined()) { - return Variable(c10::make_intrusive( - std::move(base), std::move(data), std::move(gradient_edge))); + if (potential_history_tracking) { + return Variable(c10::make_intrusive( + std::move(base), std::move(data), std::move(gradient_edge))); + } else { + auto var = Variable(c10::make_intrusive( + std::move(data), false, std::move(gradient_edge))); + var.set_version_counter(base.version_counter()); + return var; + } } return Variable(); } @@ -500,7 +548,7 @@ inline std::shared_ptr Variable::grad_accumulator() const { } inline Variable Variable::detach() const { - return get()->detach(); + return make_variable_view(*this, get()->data_, /*potential_history_tracking=*/false); } inline void Variable::detach_() { From 37bd215f62a72e74c0d5d28e55ad1414c1e30ce4 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 9 Oct 2018 14:45:34 -0700 Subject: [PATCH 02/23] potential_history_tracking -> potentially_tracks_history --- torch/csrc/autograd/VariableTypeUtils.h | 8 ++++---- torch/csrc/autograd/variable.h | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/torch/csrc/autograd/VariableTypeUtils.h b/torch/csrc/autograd/VariableTypeUtils.h index 7f94cd0747e3d..6dda7c69774f7 100644 --- a/torch/csrc/autograd/VariableTypeUtils.h +++ b/torch/csrc/autograd/VariableTypeUtils.h @@ -104,23 +104,23 @@ template inline variable_list flatten_tensor_args(Args&&... ar } // See NOTE [ Autograd Variable Views ] for details. -inline Tensor as_view(const Tensor & base, Tensor tensor, bool potential_history_tracking = true) { +inline Tensor as_view(const Tensor & base, Tensor tensor, bool potentially_tracks_history = true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } - return make_variable_view(std::move(base_var), std::move(tensor), potential_history_tracking); + return make_variable_view(std::move(base_var), std::move(tensor), potentially_tracks_history); } // See NOTE [ Autograd Variable Views ] for details. inline std::vector as_view(const Tensor & base, std::vector tensors, - bool potential_history_tracking = true) { + bool potentially_tracks_history = true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } for(Tensor &tensor : tensors) { - tensor = make_variable_view(base_var, std::move(tensor), potential_history_tracking); + tensor = make_variable_view(base_var, std::move(tensor), potentially_tracks_history); } return tensors; } diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index ef519fff2b7c7..8e9dad995a488 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -93,13 +93,13 @@ struct TORCH_API Variable : public at::Tensor { /// Creates a `Variable` that is a *view* of another (*base*) variable. /// The `gradient_edge` is an optional (gradient_function, input_number) pair. - /// `potential_history_tracking` is a bool that specifies whether this view + /// `potentially_tracks_history` is a bool that specifies whether this view /// relation should be considered by autograd. /// See NOTE [ Autograd Variable Views ] for details. friend Variable make_variable_view( Variable base, at::Tensor data, - bool potential_history_tracking, + bool potentially_tracks_history, Edge gradient_edge); /// Creates a `Variable` from the given `Tensor`. `requires_grad` should be @@ -457,10 +457,10 @@ struct TORCH_API Variable::ViewImpl : public Variable::Impl { inline Variable make_variable_view( Variable base, at::Tensor data, - bool potential_history_tracking = true, + bool potentially_tracks_history = true, Edge gradient_edge = Edge()) { if (data.defined()) { - if (potential_history_tracking) { + if (potentially_tracks_history) { return Variable(c10::make_intrusive( std::move(base), std::move(data), std::move(gradient_edge))); } else { @@ -548,7 +548,7 @@ inline std::shared_ptr Variable::grad_accumulator() const { } inline Variable Variable::detach() const { - return make_variable_view(*this, get()->data_, /*potential_history_tracking=*/false); + return make_variable_view(*this, get()->data_, /*potentially_tracks_history=*/false); } inline void Variable::detach_() { From 44a279e977b1e1060f412ee1b431d714aebd4b1e Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 9 Oct 2018 21:37:06 -0700 Subject: [PATCH 03/23] make the note clearer --- torch/csrc/autograd/variable.h | 42 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 8e9dad995a488..9fe2004f69ca6 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -381,10 +381,12 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// NOTE [ Autograd Variable Views ] /// -/// A Variable that is a view of another Variable (called base Variable), i.e., -/// they share storage, **and** may potentially record gradient flow between the -/// two Variables. Even if the base currently does not require grad, it is still -/// important to record this view relation to support operations like: +/// Many operations return Variable that shares storage with an input Variable. +/// The returned Varaible is called a **view** Variable on the input **base** +/// Variable. Variable::ViewImple is created to support gradient tracking of +/// potential **in-place** operations on either of these two Variables. Note +/// that even if the base currently does not require grad, it is still important +/// to record this view relation to support operations like: /// /// # Have: /// # base.requires_grad = False @@ -393,31 +395,37 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// torch.autograd.grad(base.sum(), var) <- should return an all ones tensor /// /// Above example is effectively base[1].copy_(var). To support this, in the -/// rebase_history of base[1], we need to get the update the grad_fn of the base -/// Variable. Therefore, we still record the view relation between base and -/// base[1] even though they don't require gradients at creation time. +/// rebase_history of base[1], we need to update the grad_fn of base. Therefore, +/// we still record the view relation between base and base[1] even though they +/// don't require gradients at creation time. /// -/// Another similar example is: +/// A similar example but with in-place operation on base is: /// /// # Have: /// # base.requires_grad = False /// # var.requires_grad = True +/// view = base[1] /// base.copy_(var) -/// torch.autograd.grad(base[1].sum(), var) <- should return an all ones tensor -/// +/// torch.autograd.grad(view.sum(), var) <- should return a tensor with +/// var[1] filled with all ones and +/// zeros everywhere else /// /// In a view relation, the base and view Variables share the same /// version_counter. The grad_fn field of the Variable may become stale due to /// in-place modifications of the shared data. Accesses should go through /// get_grad_fn(). All other fields are always valid. /// -/// NB: Some views will never require gradient history tracking, and will not be -/// counted as views (i.e., having is_view() = true and using ViewImpl). -/// Instead, they will be usual Variables and just sharing the version -// counters with the base Variables. Some examples are: -/// 1. views created from .detach(), -/// 2. views created when GradMode::enabled() = false. -/// Relevant logic is implemented in make_variable_view. +/// Such view Variables have is_view() = true and use ViewImpl. +/// +/// However, some outputs, although sharing storage, will **never** require +/// gradient history tracking, and thus will not register the above view +/// relation in autograd using ViewImpl. Instead, they will be usual Variables +/// and just share the version counters with the base Variables. +/// Some examples are: +/// 1. Variables created from .detach(), +/// 2. Variables created when GradMode::enabled() = false. +/// Relevant logic is implemented in make_variable_view below, and +/// wrap_output of gen_variable_type.py. struct TORCH_API Variable::ViewImpl : public Variable::Impl { ViewImpl(Variable base, at::Tensor data, Edge gradient_edge); From 2d76c7926d49a13743b69595d828bbd0b2fac4c4 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 9 Oct 2018 21:52:24 -0700 Subject: [PATCH 04/23] update note --- tools/autograd/gen_variable_type.py | 2 ++ torch/csrc/autograd/variable.h | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index a2162585ec8e6..39d3fc16dd12a 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -458,6 +458,8 @@ def wrap_output(call): if 'Tensor' not in declaration['return_type']: return call elif is_view: + # If `GradMode::is_enabled()` is False, this is a non-differentiable + # view. Gradients should not flow through. # See NOTE [ Autograd Variable Views ] in variable.h for details. return 'as_view(self, {}, GradMode::is_enabled())'.format(call) else: diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 9fe2004f69ca6..62461358e5176 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -61,8 +61,9 @@ struct Function; /// `Variable`. You can determine whether `Variable` is in fact a view by /// probing its `is_view()` method. Note that the *view* semantics are only /// meaningful for `Variable` relations that are relevant to autograd. For -/// example, if you hide your code from autograd using `.data`, the `Variable`s -/// will not be registered as having view relations, even if they share storage. +/// example, if you hide your code from autograd using `.no_grad()`, the +/// `Variable`s will not be registered as having view relations, even if they +/// share storage. /// See NOTE [ Autograd Variable Views ] for more details. /// /// @@ -417,15 +418,17 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// /// Such view Variables have is_view() = true and use ViewImpl. /// -/// However, some outputs, although sharing storage, will **never** require -/// gradient history tracking, and thus will not register the above view -/// relation in autograd using ViewImpl. Instead, they will be usual Variables -/// and just share the version counters with the base Variables. +/// However, outputs of some functions, although sharing storage with inputs, +/// will **never** require gradient history tracking, and thus will not register +/// the above view relation in autograd using ViewImpl. Instead, they will be +/// usual Variables and just share the version counters with the base Variables. /// Some examples are: /// 1. Variables created from .detach(), /// 2. Variables created when GradMode::enabled() = false. -/// Relevant logic is implemented in make_variable_view below, and -/// wrap_output of gen_variable_type.py. +/// We call these non-differentiable views as the gradients do not flow through +/// the view relation. +/// Relevant logic for non-differentiable views is implemented in +/// make_variable_view below, and wrap_output of gen_variable_type.py. struct TORCH_API Variable::ViewImpl : public Variable::Impl { ViewImpl(Variable base, at::Tensor data, Edge gradient_edge); From 96d38b84c58cee03169f9e04e2b80a07327ed223 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 9 Oct 2018 22:18:14 -0700 Subject: [PATCH 05/23] more comments --- torch/csrc/autograd/variable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 62461358e5176..86af0b5e7b177 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -472,9 +472,11 @@ inline Variable make_variable_view( Edge gradient_edge = Edge()) { if (data.defined()) { if (potentially_tracks_history) { + /// Differentiable view. Track history with ViewImpl. return Variable(c10::make_intrusive( std::move(base), std::move(data), std::move(gradient_edge))); } else { + /// Non-differentiable view. Just share version counter. auto var = Variable(c10::make_intrusive( std::move(data), false, std::move(gradient_edge))); var.set_version_counter(base.version_counter()); From c91b9b7808f08595de45c86d3df12e87c59aae02 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Wed, 10 Oct 2018 16:43:24 -0700 Subject: [PATCH 06/23] diff and nondiff views --- torch/csrc/autograd/VariableTypeUtils.h | 8 +-- torch/csrc/autograd/variable.cpp | 10 +-- torch/csrc/autograd/variable.h | 87 +++++++++++++++---------- 3 files changed, 60 insertions(+), 45 deletions(-) diff --git a/torch/csrc/autograd/VariableTypeUtils.h b/torch/csrc/autograd/VariableTypeUtils.h index 6dda7c69774f7..ef2e9b230aee3 100644 --- a/torch/csrc/autograd/VariableTypeUtils.h +++ b/torch/csrc/autograd/VariableTypeUtils.h @@ -104,23 +104,23 @@ template inline variable_list flatten_tensor_args(Args&&... ar } // See NOTE [ Autograd Variable Views ] for details. -inline Tensor as_view(const Tensor & base, Tensor tensor, bool potentially_tracks_history = true) { +inline Tensor as_view(const Tensor & base, Tensor tensor, bool is_differentiable = true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } - return make_variable_view(std::move(base_var), std::move(tensor), potentially_tracks_history); + return make_variable_view(std::move(base_var), std::move(tensor), is_differentiable); } // See NOTE [ Autograd Variable Views ] for details. inline std::vector as_view(const Tensor & base, std::vector tensors, - bool potentially_tracks_history = true) { + bool is_differentiable = true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } for(Tensor &tensor : tensors) { - tensor = make_variable_view(base_var, std::move(tensor), potentially_tracks_history); + tensor = make_variable_view(base_var, std::move(tensor), is_differentiable); } return tensors; } diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp index 025b64a23b137..74a58f8454bd9 100644 --- a/torch/csrc/autograd/variable.cpp +++ b/torch/csrc/autograd/variable.cpp @@ -166,7 +166,7 @@ void Variable::Impl::release_resources() { hooks_.clear(); } -Variable::ViewImpl::ViewImpl(Variable base, at::Tensor data, Edge gradient_edge) +Variable::DifferentiableViewImpl::DifferentiableViewImpl(Variable base, at::Tensor data, Edge gradient_edge) : Variable::Impl(std::move(data), false, std::move(gradient_edge)), base_(std::move(base)) { AT_CHECK(base_.defined(), "base is undefined"); @@ -178,7 +178,7 @@ Variable::ViewImpl::ViewImpl(Variable base, at::Tensor data, Edge gradient_edge) attr_version = version_counter_.current_version(); } -std::shared_ptr& Variable::ViewImpl::get_grad_fn() { +std::shared_ptr& Variable::DifferentiableViewImpl::get_grad_fn() { std::lock_guard lock(mutex_); if (!grad_fn_ && !base_.requires_grad()) { return grad_fn_; @@ -202,7 +202,7 @@ std::shared_ptr& Variable::ViewImpl::get_grad_fn() { return grad_fn_; } -void Variable::ViewImpl::rebase_history(Edge gradient_edge) { +void Variable::DifferentiableViewImpl::rebase_history(Edge gradient_edge) { AT_ASSERT(gradient_edge.input_nr == 0); AT_ASSERT(gradient_edge.function); AT_CHECK( @@ -215,7 +215,7 @@ void Variable::ViewImpl::rebase_history(Edge gradient_edge) { get_grad_fn(); // trigger an update to the view's grad_fn } -void Variable::ViewImpl::release_resources() { +void Variable::DifferentiableViewImpl::release_resources() { Variable::Impl::release_resources(); base_.reset(); } @@ -223,7 +223,7 @@ void Variable::ViewImpl::release_resources() { void Variable::rebase_history(Edge gradient_edge) { AT_ASSERT(gradient_edge.function != nullptr); if (is_view()) { - auto& impl = static_cast(*get()); + auto& impl = static_cast(*get()); impl.rebase_history(std::move(gradient_edge)); } else { set_gradient_edge(std::move(gradient_edge)); diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 86af0b5e7b177..19e25cd6af215 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -94,13 +94,13 @@ struct TORCH_API Variable : public at::Tensor { /// Creates a `Variable` that is a *view* of another (*base*) variable. /// The `gradient_edge` is an optional (gradient_function, input_number) pair. - /// `potentially_tracks_history` is a bool that specifies whether this view - /// relation should be considered by autograd. + /// `is_differentiable` is a bool that specifies whether this view is + /// differentiable, i.e., whether the relation should be tracked by autograd. /// See NOTE [ Autograd Variable Views ] for details. friend Variable make_variable_view( Variable base, at::Tensor data, - bool potentially_tracks_history, + bool is_differentiable, Edge gradient_edge); /// Creates a `Variable` from the given `Tensor`. `requires_grad` should be @@ -269,7 +269,7 @@ struct TORCH_API Variable : public at::Tensor { /// and the `get()` method which exposes it shall forever remain private and /// never be exposed to the public interface of this class. struct Impl; - struct ViewImpl; + struct DifferentiableViewImpl; // Private Methods //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -377,30 +377,38 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { }; //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -// Variable::ViewImpl +// Variable::DifferentiableViewImpl //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// NOTE [ Autograd Variable Views ] /// /// Many operations return Variable that shares storage with an input Variable. /// The returned Varaible is called a **view** Variable on the input **base** -/// Variable. Variable::ViewImple is created to support gradient tracking of -/// potential **in-place** operations on either of these two Variables. Note -/// that even if the base currently does not require grad, it is still important -/// to record this view relation to support operations like: +/// Variable. +/// +/// In PyTorch, we have two types of views: differentiable views, and +/// non-differentiable views. In either type, to support proper version +/// checking, the base and view Variables always share the same version_counter. +/// +/// +/// Differentiable Views +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// Differentiable views are the view variables you still want gradients to flow +/// back to the base variables. Out-of-place operations on views are quite +/// straightforward, but in-place ones on views are very tricky. Even if the +/// base variable may not require grad when we create the view, we still need to +/// track the view relation because future in-place ops may require back-proping +/// through it. We need to support autograd through +/// +/// (1) in-place operation on view, e.g., /// /// # Have: /// # base.requires_grad = False /// # var.requires_grad = True -/// base[1] = var +/// base[1] = var # i.e., base[1].copy_(var) /// torch.autograd.grad(base.sum(), var) <- should return an all ones tensor /// -/// Above example is effectively base[1].copy_(var). To support this, in the -/// rebase_history of base[1], we need to update the grad_fn of base. Therefore, -/// we still record the view relation between base and base[1] even though they -/// don't require gradients at creation time. -/// -/// A similar example but with in-place operation on base is: +/// (2) in-place operation on base after view is created, e.g., /// /// # Have: /// # base.requires_grad = False @@ -411,26 +419,33 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// var[1] filled with all ones and /// zeros everywhere else /// -/// In a view relation, the base and view Variables share the same -/// version_counter. The grad_fn field of the Variable may become stale due to -/// in-place modifications of the shared data. Accesses should go through -/// get_grad_fn(). All other fields are always valid. +/// Variable::DifferentiableViewImpl is created to support gradient tracking of +/// such **in-place** operations. In particular, +/// + if an in-place op is done on base, the grad_fn field of the view may +/// become stale. So accesses should go through get_grad_fn(), whcih +/// reconstruct an updated grad_fn if the version_counter had incremented. +/// All other fields are always valid. +/// + if an in-place op is done on view, in rebase_history() of view, which is +/// called after every in-place op in VariableType.cpp, the grad_fn of base +/// is updated. /// -/// Such view Variables have is_view() = true and use ViewImpl. /// -/// However, outputs of some functions, although sharing storage with inputs, -/// will **never** require gradient history tracking, and thus will not register -/// the above view relation in autograd using ViewImpl. Instead, they will be -/// usual Variables and just share the version counters with the base Variables. +/// Non-Differentiable Views +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// In certain cases, although function outputs share storage with inputs, they +/// will **never** require gradient history tracking. Instead of registering the +/// view relation via DifferentiableViewImpl in autograd, the views will be +/// using usual Varaible::Impl and just share the version counters with the base +/// Variables. /// Some examples are: -/// 1. Variables created from .detach(), -/// 2. Variables created when GradMode::enabled() = false. -/// We call these non-differentiable views as the gradients do not flow through -/// the view relation. +/// 1. Views created from .detach(), +/// 2. Views created when GradMode::enabled() = false. +/// These are called non-differentiable views as the gradients do not flow +/// through the view relation. /// Relevant logic for non-differentiable views is implemented in /// make_variable_view below, and wrap_output of gen_variable_type.py. -struct TORCH_API Variable::ViewImpl : public Variable::Impl { - ViewImpl(Variable base, at::Tensor data, Edge gradient_edge); +struct TORCH_API Variable::DifferentiableViewImpl : public Variable::Impl { + DifferentiableViewImpl(Variable base, at::Tensor data, Edge gradient_edge); /// Gets the up-to-date grad_fn. If the shared data or base was modified, we /// re-create the grad_fn to express the up-to-date view relationship between @@ -468,12 +483,12 @@ struct TORCH_API Variable::ViewImpl : public Variable::Impl { inline Variable make_variable_view( Variable base, at::Tensor data, - bool potentially_tracks_history = true, + bool is_differentiable = true, Edge gradient_edge = Edge()) { if (data.defined()) { - if (potentially_tracks_history) { - /// Differentiable view. Track history with ViewImpl. - return Variable(c10::make_intrusive( + if (is_differentiable) { + /// Differentiable view. Track history with DifferentiableViewImpl. + return Variable(c10::make_intrusive( std::move(base), std::move(data), std::move(gradient_edge))); } else { /// Non-differentiable view. Just share version counter. @@ -561,7 +576,7 @@ inline std::shared_ptr Variable::grad_accumulator() const { } inline Variable Variable::detach() const { - return make_variable_view(*this, get()->data_, /*potentially_tracks_history=*/false); + return make_variable_view(*this, get()->data_, /*is_differentiable=*/false); } inline void Variable::detach_() { From 90baa51269136d8075671488da2afe23624e7c76 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Wed, 10 Oct 2018 20:48:39 -0700 Subject: [PATCH 07/23] more comments --- torch/csrc/autograd/functions/tensor.h | 4 ++++ torch/csrc/autograd/variable.h | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/torch/csrc/autograd/functions/tensor.h b/torch/csrc/autograd/functions/tensor.h index 101595368076e..56997ba6c3190 100644 --- a/torch/csrc/autograd/functions/tensor.h +++ b/torch/csrc/autograd/functions/tensor.h @@ -22,6 +22,10 @@ struct CopyBackwards : public Function { // Performs grad[idx] = fn(grad[idx]), but out-of-place. The slicing operation // grad[idx] is defined by the relative sizes, strides, and offset of base and // view. +// When an in-place operation is done on a differentiable view, the base's +// grad_fn is updated to become a `CopySlice` wrapping the backward of the +// in-place operation. +// See NOTE [ Autograd Variable Views ]. struct CopySlices : public Function { CopySlices( const Variable& base_var, diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 19e25cd6af215..4deacb71b4640 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -388,19 +388,20 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// /// In PyTorch, we have two types of views: differentiable views, and /// non-differentiable views. In either type, to support proper version -/// checking, the base and view Variables always share the same version_counter. +/// checking, the base and view Variables must always share the same +/// version_counter. /// /// /// Differentiable Views /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// Differentiable views are the view variables you still want gradients to flow +/// Differentiable views are the view variables where you want gradients to flow /// back to the base variables. Out-of-place operations on views are quite -/// straightforward, but in-place ones on views are very tricky. Even if the -/// base variable may not require grad when we create the view, we still need to +/// straightforward, but in-place ones are very tricky. Even if the base +/// variable may not require grad when we create the view, we still need to /// track the view relation because future in-place ops may require back-proping -/// through it. We need to support autograd through +/// through it. For example, we need to support /// -/// (1) in-place operation on view, e.g., +/// (1) in-place operation on view like /// /// # Have: /// # base.requires_grad = False @@ -408,7 +409,7 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// base[1] = var # i.e., base[1].copy_(var) /// torch.autograd.grad(base.sum(), var) <- should return an all ones tensor /// -/// (2) in-place operation on base after view is created, e.g., +/// (2) in-place operation on base after view is created like /// /// # Have: /// # base.requires_grad = False @@ -422,8 +423,8 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// Variable::DifferentiableViewImpl is created to support gradient tracking of /// such **in-place** operations. In particular, /// + if an in-place op is done on base, the grad_fn field of the view may -/// become stale. So accesses should go through get_grad_fn(), whcih -/// reconstruct an updated grad_fn if the version_counter had incremented. +/// become stale. So accesses should always go through get_grad_fn(), which +/// reconstructs an updated grad_fn if the version_counter has incremented. /// All other fields are always valid. /// + if an in-place op is done on view, in rebase_history() of view, which is /// called after every in-place op in VariableType.cpp, the grad_fn of base From 5f4ba880b1f70f3fadf5a463185ee5d64a2ba3df Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Wed, 10 Oct 2018 21:29:09 -0700 Subject: [PATCH 08/23] rename note --- tools/autograd/gen_autograd.py | 2 +- tools/autograd/gen_variable_type.py | 2 +- torch/csrc/autograd/VariableTypeUtils.h | 4 ++-- torch/csrc/autograd/functions/tensor.h | 2 +- torch/csrc/autograd/variable.h | 8 ++++---- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/autograd/gen_autograd.py b/tools/autograd/gen_autograd.py index e23d0b881b035..4cf21ed7ad2ed 100644 --- a/tools/autograd/gen_autograd.py +++ b/tools/autograd/gen_autograd.py @@ -14,7 +14,7 @@ from collections import defaultdict from .utils import YamlLoader, split_name_params -# See NOTE [ Autograd Variable Views ] in variable.h for details. +# See NOTE [ Autograd View Variables ] in variable.h for details. VIEW_FUNCTIONS = { 'alias', 'as_strided', 'diagonal', 'expand', 'narrow', 'permute', 'select', 'slice', 'squeeze', 't', 'transpose', 'unfold', 'unsqueeze', 'view', 'unbind', diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index 39d3fc16dd12a..3f61f5341aed3 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -460,7 +460,7 @@ def wrap_output(call): elif is_view: # If `GradMode::is_enabled()` is False, this is a non-differentiable # view. Gradients should not flow through. - # See NOTE [ Autograd Variable Views ] in variable.h for details. + # See NOTE [ Autograd View Variables ] in variable.h for details. return 'as_view(self, {}, GradMode::is_enabled())'.format(call) else: return 'as_variable({})'.format(call) diff --git a/torch/csrc/autograd/VariableTypeUtils.h b/torch/csrc/autograd/VariableTypeUtils.h index ef2e9b230aee3..07cc420e42987 100644 --- a/torch/csrc/autograd/VariableTypeUtils.h +++ b/torch/csrc/autograd/VariableTypeUtils.h @@ -103,7 +103,7 @@ template inline variable_list flatten_tensor_args(Args&&... ar return out; // RVO } -// See NOTE [ Autograd Variable Views ] for details. +// See NOTE [ Autograd View Variables ] for details. inline Tensor as_view(const Tensor & base, Tensor tensor, bool is_differentiable = true) { auto base_var = Variable(base); if (base_var.is_view()) { @@ -112,7 +112,7 @@ inline Tensor as_view(const Tensor & base, Tensor tensor, bool is_differentiable return make_variable_view(std::move(base_var), std::move(tensor), is_differentiable); } -// See NOTE [ Autograd Variable Views ] for details. +// See NOTE [ Autograd View Variables ] for details. inline std::vector as_view(const Tensor & base, std::vector tensors, bool is_differentiable = true) { auto base_var = Variable(base); diff --git a/torch/csrc/autograd/functions/tensor.h b/torch/csrc/autograd/functions/tensor.h index 56997ba6c3190..11c447cb8bae3 100644 --- a/torch/csrc/autograd/functions/tensor.h +++ b/torch/csrc/autograd/functions/tensor.h @@ -25,7 +25,7 @@ struct CopyBackwards : public Function { // When an in-place operation is done on a differentiable view, the base's // grad_fn is updated to become a `CopySlice` wrapping the backward of the // in-place operation. -// See NOTE [ Autograd Variable Views ]. +// See NOTE [ Autograd View Variables ]. struct CopySlices : public Function { CopySlices( const Variable& base_var, diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 4deacb71b4640..a82ccdfb8f7c3 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -64,7 +64,7 @@ struct Function; /// example, if you hide your code from autograd using `.no_grad()`, the /// `Variable`s will not be registered as having view relations, even if they /// share storage. -/// See NOTE [ Autograd Variable Views ] for more details. +/// See NOTE [ Autograd View Variables ] for more details. /// /// /// Interface @@ -96,7 +96,7 @@ struct TORCH_API Variable : public at::Tensor { /// The `gradient_edge` is an optional (gradient_function, input_number) pair. /// `is_differentiable` is a bool that specifies whether this view is /// differentiable, i.e., whether the relation should be tracked by autograd. - /// See NOTE [ Autograd Variable Views ] for details. + /// See NOTE [ Autograd View Variables ] for details. friend Variable make_variable_view( Variable base, at::Tensor data, @@ -380,7 +380,7 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { // Variable::DifferentiableViewImpl //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// NOTE [ Autograd Variable Views ] +/// NOTE [ Autograd View Variables ] /// /// Many operations return Variable that shares storage with an input Variable. /// The returned Varaible is called a **view** Variable on the input **base** @@ -480,7 +480,7 @@ struct TORCH_API Variable::DifferentiableViewImpl : public Variable::Impl { // Factory Functions //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -// See NOTE [ Autograd Variable Views ] for details. +// See NOTE [ Autograd View Variables ] for details. inline Variable make_variable_view( Variable base, at::Tensor data, From e6f79133a92102d930900089ad18eff0010138e3 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Thu, 11 Oct 2018 12:05:36 -0700 Subject: [PATCH 09/23] fix typos --- torch/csrc/autograd/variable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index a82ccdfb8f7c3..4a4291dc661a9 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -383,7 +383,7 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// NOTE [ Autograd View Variables ] /// /// Many operations return Variable that shares storage with an input Variable. -/// The returned Varaible is called a **view** Variable on the input **base** +/// The returned Variable is called a **view** Variable on the input **base** /// Variable. /// /// In PyTorch, we have two types of views: differentiable views, and @@ -401,7 +401,7 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// track the view relation because future in-place ops may require back-proping /// through it. For example, we need to support /// -/// (1) in-place operation on view like +/// (1) in-place operation on view, e.g., /// /// # Have: /// # base.requires_grad = False @@ -409,7 +409,7 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// base[1] = var # i.e., base[1].copy_(var) /// torch.autograd.grad(base.sum(), var) <- should return an all ones tensor /// -/// (2) in-place operation on base after view is created like +/// (2) in-place operation on base after view is created, e.g., /// /// # Have: /// # base.requires_grad = False @@ -440,7 +440,7 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// Variables. /// Some examples are: /// 1. Views created from .detach(), -/// 2. Views created when GradMode::enabled() = false. +/// 2. Views created when GradMode::is_enabled() = false. /// These are called non-differentiable views as the gradients do not flow /// through the view relation. /// Relevant logic for non-differentiable views is implemented in From d31cda1ebf6c42ce03de2c6ba333c1acee008d7f Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Fri, 12 Oct 2018 09:56:27 -0700 Subject: [PATCH 10/23] typo --- torch/csrc/autograd/variable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 4a4291dc661a9..3b97435914477 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -436,7 +436,7 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// In certain cases, although function outputs share storage with inputs, they /// will **never** require gradient history tracking. Instead of registering the /// view relation via DifferentiableViewImpl in autograd, the views will be -/// using usual Varaible::Impl and just share the version counters with the base +/// using usual Variable::Impl and just share the version counters with the base /// Variables. /// Some examples are: /// 1. Views created from .detach(), From 41720423657063c973e21939c4b18e8566ec8a18 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Fri, 3 Aug 2018 15:30:24 -0400 Subject: [PATCH 11/23] Get rid of HARDCODED_DIFFERENTIABLE_OUTPUTS. Replace it with `output_differentiability` in derivatives.yaml. Also relax the check that gradient formulas need to use all grad outputs. It is well possible that to compute a particular grad_input[i], only part of all grad_ourputs are needed. add sparse get_values and make it back-prop-able Make get_values back-prop-able make indices and values view functions Make all sparse_coo ctors dispatch to a native function, _sparse_new_with_dims_and_tensor. Remove the dispatch mechaism on native_* native ctors, e.g., native_sparse_coo_tensor. Now all the code lives in functions like sparse_coo_tensor. Make sparse coo ctor a view function Make _newFlattenedIndices a native function Implement sparse_constructor_backward Get rid of NNZ optimization Move native/sparse/SparseUtils.h to SparseTensorUtils.h add getter docs make _set_coalesced a native fn and call it _coalesced_ sparseDims -> sparse_dim; denseDims -> dense_dim update test_print expect because I fixed _indices output to not have grad_fn now infer type first get_indices -> indices; get_values -> values purge options from sparse_coo_tensor with indices and values tensors Fix coalesced tests; update prints; use type dispatch for size only ctor Update note; support nondiff views; update prints workaround for sparse views and inplace ops --- aten/src/ATen/Declarations.cwrap | 4 +- aten/src/ATen/SparseTensorImpl.cpp | 22 +- aten/src/ATen/SparseTensorImpl.h | 93 +++--- aten/src/ATen/SparseTensorUtils.h | 109 +++++++ aten/src/ATen/core/Tensor.h | 14 +- aten/src/ATen/core/TensorMethods.h | 34 ++- aten/src/ATen/core/Type.h | 14 +- aten/src/ATen/core/aten_interned_strings.h | 11 +- aten/src/ATen/function_wrapper.py | 7 +- aten/src/ATen/native/LegacyBridge.cpp | 27 -- aten/src/ATen/native/TensorFactories.cpp | 4 +- aten/src/ATen/native/TensorShape.cpp | 56 ++-- aten/src/ATen/native/native_functions.yaml | 199 ++++++++++--- aten/src/ATen/native/sparse/SparseTensor.cpp | 273 ++++++++++-------- .../ATen/native/sparse/SparseTensorMath.cpp | 128 ++++---- aten/src/ATen/native/sparse/SparseUtils.h | 120 -------- .../sparse/cuda/SparseCUDAApplyUtils.cuh | 25 +- .../native/sparse/cuda/SparseCUDATensor.cpp | 21 +- .../native/sparse/cuda/SparseCUDATensor.cu | 19 +- .../sparse/cuda/SparseCUDATensorMath.cu | 67 +++-- aten/src/ATen/native_parse.py | 1 + aten/src/ATen/templates/TypeDefault.cpp | 1 + test/common_utils.py | 25 ++ test/expect/TestCudaSparse.test_print.expect | 48 ++- ...estCudaUncoalescedSparse.test_print.expect | 48 ++- test/expect/TestSparse.test_print.expect | 42 +-- .../TestUncoalescedSparse.test_print.expect | 42 +-- test/test_autograd.py | 48 ++- test/test_sparse.py | 131 ++++----- tools/autograd/derivatives.yaml | 37 ++- tools/autograd/gen_autograd.py | 49 ++-- tools/autograd/gen_python_functions.py | 2 +- tools/autograd/gen_variable_type.py | 100 +++++-- tools/autograd/load_derivatives.py | 41 +-- tools/autograd/templates/Functions.cpp | 26 +- torch/_tensor_docs.py | 56 +++- torch/csrc/cuda/comm.cpp | 4 +- torch/csrc/utils/tensor_flatten.cpp | 7 +- torch/csrc/utils/tensor_new.cpp | 14 +- 39 files changed, 1161 insertions(+), 808 deletions(-) create mode 100644 aten/src/ATen/SparseTensorUtils.h delete mode 100644 aten/src/ATen/native/sparse/SparseUtils.h diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap index 889aecfa84b04..aceec5106c56d 100644 --- a/aten/src/ATen/Declarations.cwrap +++ b/aten/src/ATen/Declarations.cwrap @@ -3266,7 +3266,9 @@ name: alias return: THTensor* cpu_half: True - variants: [function] + variants: + - method + - function options: - cname: newWithTensor arguments: diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp index d7f04e2f261d7..1ffba3fd04952 100644 --- a/aten/src/ATen/SparseTensorImpl.cpp +++ b/aten/src/ATen/SparseTensorImpl.cpp @@ -22,18 +22,18 @@ namespace { // a scalar and have one element) // // Thus, an empty sparse tensor should be a 1-dimensional tensor of size [0]. -// Furthermore, we have dim == sparseDims + denseDims; since this is a sparse -// tensor, let us say that an empty sparse tensor has sparseDims == 1 and -// denseDims == 0. (There is a degree of freedom here, but given that this -// is a sparse dimension, it seems reasonable to demand that sparseDims > 0). +// Furthermore, we have dim == sparse_dim + dense_dim; since this is a sparse +// tensor, let us say that an empty sparse tensor has sparse_dim == 1 and +// dense_dim == 0. (There is a degree of freedom here, but given that this +// is a sparse dimension, it seems reasonable to demand that sparse_dim > 0). // // This means that we allocate a [1,0] size indices tensor and a [0] size // values tensor for such an empty tensor. SparseTensorImpl::SparseTensorImpl(at::TensorTypeId type_id, const caffe2::TypeMeta& data_type) : TensorImpl(type_id, data_type, nullptr, false) , size_{0} - , sparseDims_(1) - , denseDims_(0) + , sparse_dim_(1) + , dense_dim_(0) , indices_(at::empty({1, 0}, at::initialTensorOptions().device(sparseTensorIdToDeviceType(type_id)).dtype(ScalarType::Long))) , values_(at::empty({0}, at::initialTensorOptions().device(sparseTensorIdToDeviceType(type_id)).dtype(dataTypeToScalarType(data_type.id())))) {} @@ -67,7 +67,7 @@ void SparseTensorImpl::set_storage_offset(int64_t storage_offset) { } int64_t SparseTensorImpl::dim() const { - return sparseDims_ + denseDims_; + return sparse_dim_ + dense_dim_; } TensorImpl* SparseTensorImpl::maybe_zero_dim(bool condition_when_zero_dim) { AT_CHECK(condition_when_zero_dim == (dim() == 0), @@ -83,6 +83,8 @@ int64_t SparseTensorImpl::storage_offset() const { AT_ERROR("sparse tensors do not have storage"); } void SparseTensorImpl::set_indices_and_values_unsafe(const Tensor& indices, const Tensor& values) { + AT_ASSERT(!indices.is_variable() && !values.is_variable()); // They should be plain tensors! + AT_CHECK(values.type().toSparse() == type(), "values type must match sparse tensor type"); AT_CHECK(indices.type().scalarType() == kLong, "indices must be an int64 tensor"); AT_CHECK(indices.type().backend() == values.type().backend(), "backend of indices (", indices.type().backend(), ") must match backend of values (", values.type().backend(), ")"); @@ -90,10 +92,10 @@ void SparseTensorImpl::set_indices_and_values_unsafe(const Tensor& indices, cons AT_CHECK(indices.dim() == 2, "indices must be nDim x nnz, but got: ", indices.sizes()); AT_CHECK(indices.size(1) == values.size(0), "indices and values must have same nnz, but got nnz from indices: ", indices.size(1), ", nnz from values: ", values.size(0)); - AT_CHECK(indices.size(0) == sparseDims_, "indices has incorrect first dimension, expected ", sparseDims_, ", got ", indices.size(0)); - AT_CHECK(values.dim() == denseDims_ + 1, "values has incorrect number of dimensions, expected ", denseDims_ + 1, ", got ", values.dim()); + AT_CHECK(indices.size(0) == sparse_dim_, "indices has incorrect first dimension, expected ", sparse_dim_, ", got ", indices.size(0)); + AT_CHECK(values.dim() == dense_dim_ + 1, "values has incorrect number of dimensions, expected ", dense_dim_ + 1, ", got ", values.dim()); - auto dense_size_original = sizes().slice(sparseDims_); + auto dense_size_original = sizes().slice(sparse_dim_); std::vector expected_values_size_vec = {values.size(0)}; expected_values_size_vec.insert(expected_values_size_vec.end(), dense_size_original.begin(), dense_size_original.end()); IntList expected_values_size(expected_values_size_vec); diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h index 9a68f80a912b6..1a8fa8b29baea 100644 --- a/aten/src/ATen/SparseTensorImpl.h +++ b/aten/src/ATen/SparseTensorImpl.h @@ -9,18 +9,18 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { // Stored in COO format, indices + values. // INVARIANTS: - // _sparseDims: range [0, len(shape)]; _sparseDims + _denseDims = len(shape) - // _denseDims : range [0, len(shape)]; _sparseDims + _denseDims = len(shape) - // _indices.shape: dimensionality: 2, shape: (_sparseDims, nnz) - // _values.shape: dimensionality: 1 + _denseDims. shape: (nnz, shape[_sparseDims:]) + // sparse_dim: range [0, len(shape)]; sparse_dim + dense_dim = len(shape) + // dense_dim : range [0, len(shape)]; sparse_dim + dense_dim = len(shape) + // _indices.shape: dimensionality: 2, shape: (sparse_dim, nnz) + // _values.shape: dimensionality: 1 + dense_dim. shape: (nnz, shape[sparse_dim:]) // The true size of the sparse tensor (e.g., if you called to_dense() // on it). When THTensor merges into TensorImpl, this field // should move to the parent class. std::vector size_; - int64_t sparseDims_ = 0; // number of sparse dimensions - int64_t denseDims_ = 0; // number of dense dimensions + int64_t sparse_dim_ = 0; // number of sparse dimensions + int64_t dense_dim_ = 0; // number of dense dimensions Tensor indices_; // always a LongTensor Tensor values_; @@ -39,8 +39,8 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { explicit SparseTensorImpl(at::TensorTypeId, const caffe2::TypeMeta&); int64_t nnz() const { return values_.size(0); } - int64_t sparseDims() const { return sparseDims_; } - int64_t denseDims() const { return denseDims_; } + int64_t sparse_dim() const { return sparse_dim_; } + int64_t dense_dim() const { return dense_dim_; } bool coalesced() const { return coalesced_; } Tensor indices() const { return indices_; } Tensor values() const { return values_; } @@ -60,16 +60,16 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { const Storage& storage() const override; int64_t storage_offset() const override; - // WARNING: This function does NOT preserve invariants of sparseDims/denseDims with + // WARNING: This function does NOT preserve invariants of sparse_dim/dense_dim with // respect to indices and values - void raw_resize_(int64_t sparseDims, int64_t denseDims, IntList size) { + void raw_resize_(int64_t sparse_dim, int64_t dense_dim, IntList size) { size_ = size.vec(); - sparseDims_ = sparseDims; - denseDims_ = denseDims; + sparse_dim_ = sparse_dim; + dense_dim_ = dense_dim; refresh_numel(); } - // NOTE: This function preserves invariants of sparseDims/denseDims with respect to + // NOTE: This function preserves invariants of sparse_dim/dense_dim with respect to // indices and values. // // NOTE: This function supports the following cases: @@ -91,36 +91,36 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { // and for API consistency we don't support it). // 4. When we attempt to shrink the size of any of the sparse dimensions on a non-empty sparse tensor // (this could make some of the stored indices out-of-bound and thus unsafe). - void resize_(int64_t sparseDims, int64_t denseDims, IntList size) { - AT_CHECK(sparseDims + denseDims == size.size(), "number of dimensions must be sparseDims (", sparseDims, ") + denseDims (", denseDims, "), but got ", size.size()); + void resize_(int64_t sparse_dim, int64_t dense_dim, IntList size) { + AT_CHECK(sparse_dim + dense_dim == size.size(), "number of dimensions must be sparse_dim (", sparse_dim, ") + dense_dim (", dense_dim, "), but got ", size.size()); if (nnz() > 0) { auto alt_options_msg = "You could try the following options:\n\ -1. If you need an empty sparse tensor of this size, call `x=torch.sparse_coo_tensor(size)`.\n\ +1. If you need an empty sparse tensor of this size, call `x = torch.sparse_coo_tensor(size)`.\n\ 2. If you need to resize this tensor, you have the following options:\n\ 1. For both sparse and dense dimensions, keep the number of them constant and the size of them non-shrinking, and then try the same call again.\n\ 2. Or, create a new sparse tensor with the correct indices and values from this sparse tensor."; - AT_CHECK(sparseDims == sparseDims_, - "changing the number of sparse dimensions (from ", sparseDims_, " to ", sparseDims, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); + AT_CHECK(sparse_dim == sparse_dim_, + "changing the number of sparse dimensions (from ", sparse_dim_, " to ", sparse_dim, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); - AT_CHECK(denseDims == denseDims_, - "changing the number of dense dimensions (from ", denseDims_, " to ", denseDims, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); + AT_CHECK(dense_dim == dense_dim_, + "changing the number of dense dimensions (from ", dense_dim_, " to ", dense_dim, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); bool shrinking_sparse_dims = false; - bool shrinking_dense_dims = false; - auto sparse_size_original = sizes().slice(0, sparseDims); - auto sparse_size_new = size.slice(0, sparseDims); - for (int i = 0; i < sparseDims; i++) { + bool shrinking_dense_dim = false; + auto sparse_size_original = sizes().slice(0, sparse_dim); + auto sparse_size_new = size.slice(0, sparse_dim); + for (int i = 0; i < sparse_dim; i++) { if (sparse_size_new[i] < sparse_size_original[i]) { shrinking_sparse_dims = true; break; } } - auto dense_size_original = sizes().slice(sparseDims); - auto dense_size_new = size.slice(sparseDims); - for (int i = 0; i < denseDims; i++) { + auto dense_size_original = sizes().slice(sparse_dim); + auto dense_size_new = size.slice(sparse_dim); + for (int i = 0; i < dense_dim; i++) { if (dense_size_new[i] < dense_size_original[i]) { - shrinking_dense_dims = true; + shrinking_dense_dim = true; break; } } @@ -128,38 +128,36 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { AT_CHECK(!shrinking_sparse_dims, "shrinking the size of sparse dimensions (from ", sparse_size_original, " to ", sparse_size_new, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); - AT_CHECK(!shrinking_dense_dims, + AT_CHECK(!shrinking_dense_dim, "shrinking the size of dense dimensions (from ", dense_size_original, " to ", dense_size_new, ") on a non-empty sparse tensor is not supported.\n", alt_options_msg); } - if ((!size.equals(size_)) || (sparseDims != sparseDims_) || (denseDims != denseDims_)) { - std::vector values_size = {values().size(0)}; - auto dense_size = size.slice(sparseDims); + if ((!size.equals(size_)) || (sparse_dim != sparse_dim_) || (dense_dim != dense_dim_)) { + auto nnz = values().size(0); + std::vector values_size = {nnz}; + auto dense_size = size.slice(sparse_dim); values_size.insert(values_size.end(), dense_size.begin(), dense_size.end()); values_.resize_(values_size); - - std::vector indices_size = indices().sizes().vec(); - indices_size[0] = sparseDims; - indices_.resize_(indices_size); + indices_.resize_({sparse_dim, nnz}); } size_ = size.vec(); - sparseDims_ = sparseDims; - denseDims_ = denseDims; + sparse_dim_ = sparse_dim; + dense_dim_ = dense_dim; refresh_numel(); } // NOTE: this function will resize the sparse tensor and also set `indices` and `values` to empty. - void resize_and_clear_(int64_t sparseDims, int64_t denseDims, IntList size) { - AT_CHECK(sparseDims + denseDims == size.size(), "number of dimensions must be sparseDims (", sparseDims, ") + denseDims (", denseDims, "), but got ", size.size()); + void resize_and_clear_(int64_t sparse_dim, int64_t dense_dim, IntList size) { + AT_CHECK(sparse_dim + dense_dim == size.size(), "number of dimensions must be sparse_dim (", sparse_dim, ") + dense_dim (", dense_dim, "), but got ", size.size()); size_ = size.vec(); - sparseDims_ = sparseDims; - denseDims_ = denseDims; + sparse_dim_ = sparse_dim; + dense_dim_ = dense_dim; - auto empty_indices = at::empty({sparseDims, 0}, indices().options()); + auto empty_indices = at::empty({sparse_dim, 0}, indices().options()); std::vector values_size = {0}; - auto dense_size = sizes().slice(sparseDims); + auto dense_size = sizes().slice(sparse_dim); values_size.insert(values_size.end(), dense_size.begin(), dense_size.end()); auto empty_values = at::empty(values_size, values().options()); set_indices_and_values_unsafe(empty_indices, empty_values); @@ -169,9 +167,10 @@ struct CAFFE2_API SparseTensorImpl : public TensorImpl { void set_coalesced(bool coalesced) { coalesced_ = coalesced; } // NOTE: this function is only used internally and not exposed to Python frontend - void set_nnz_and_narrow(int64_t nnz) { - indices_ = indices_.narrow(1, 0, nnz); - values_ = values_.narrow(0, 0, nnz); + void set_nnz_and_narrow(int64_t new_nnz) { + AT_ASSERT(new_nnz <= nnz()); + indices_ = indices_.narrow(1, 0, new_nnz); + values_ = values_.narrow(0, 0, new_nnz); } // Takes indices and values and directly puts them into the sparse tensor, no copy. diff --git a/aten/src/ATen/SparseTensorUtils.h b/aten/src/ATen/SparseTensorUtils.h new file mode 100644 index 0000000000000..7503b83c10b03 --- /dev/null +++ b/aten/src/ATen/SparseTensorUtils.h @@ -0,0 +1,109 @@ +#include +#include + +namespace at { namespace sparse { + +// Just for documentary purposes +using SparseTensor = Tensor; +using LongTensor = Tensor; +using IntTensor = Tensor; +using SparseType = Type; + +// This is an internal utility function for getting at the SparseTensorImpl, +// so that we can write sparse tensor specific accessors for special fields +// in SparseTensor. You should only use this for writing low level +// setters/getters for SparseTensorImpl fields; otherwise, you should use +// the low level setters/getters that were implemented using this. +// +// This may be called repeatedly, so make sure it's pretty cheap. +inline SparseTensorImpl* get_sparse_impl(const SparseTensor& self) { + AT_ASSERTM(!self.is_variable(), "_internal_get_SparseTensorImpl: should not be a variable"); + AT_ASSERTM(self.is_sparse(), "_internal_get_SparseTensorImpl: not a sparse tensor"); + return static_cast(self.unsafeGetTensorImpl()); +} + +// Port of the old THCSTensor_(checkGPU), but it doesn't really belong here +// because it is more general +// NB: I dropped kernelP2PEnabled support +// NB: This only works if the tensors are KNOWN to be CUDA. +// TODO: Generalize it so it works on CPU as well +inline bool check_device(ArrayRef ts) { + if (ts.empty()) { + return true; + } + const Tensor& ref_t = ts.front(); + int64_t curDevice = current_device(); + for (const Tensor& t : ts) { + if (t.get_device() != curDevice) return false; + } + return true; +} + +// Takes indices and values and directly puts them into the sparse tensor, no +// copy. This used to be called THSTensor_(_move) +inline void alias_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) { + get_sparse_impl(self)->set_indices_and_values_unsafe(indices, values); +} + +// Take indices and values and makes a (data) copy of them to put into the sparse +// indices/values. This used to be called THSTensor_(_set) +inline void copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) { + alias_into_sparse(self, self._indices().type().copy(indices, non_blocking), self._values().type().copy(values, non_blocking)); +} + +// TODO: put this into the public API +inline bool is_same_tensor(const Tensor& lhs, const Tensor& rhs) { + return lhs.unsafeGetTensorImpl() == rhs.unsafeGetTensorImpl(); +} + +inline bool is_same_density(const SparseTensor& self, const SparseTensor& src) { + return self.sparse_dim() == src.sparse_dim() && self.dense_dim() == src.dense_dim(); +} + +// Give us a new values tensor, with the same dimensionality +// as 'values' but with a new number of non-zero elements. +// TODO: Expose this for real in ATen, some day? +// NB: Doesn't preserve data. +inline Tensor new_values_with_size_of(const Tensor& values, int64_t nnz) { + std::vector size = values.sizes().vec(); + size[0] = nnz; + return at::empty(size, values.options()); +} + +// This helper function flattens a sparse indices tensor (a LongTensor) into a 1D +// indices tensor. E.g., +// input = [[2, 4, 0], +// [3, 1, 10]] +// full_size = [2, 12] +// output = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 10 ] = [27, 49, 10] +// +// In other words, assuming that each `indices[i, :]` is a valid index to a +// tensor `t` of shape `full_size`. This returns the corresponding indices to +// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`. +// if forceClone is true, the result will forced to be a clone of self. +inline LongTensor flatten_indices(const Tensor& indices, IntList full_size, bool forceClone = false) { + int64_t sparse_dim = indices.size(0); + if (sparse_dim == 1) { + if (forceClone) { + return indices.squeeze(0).clone(); + } else { + return indices.squeeze(0); + } + } else { + std::vector indices_mult_cpu_vec(sparse_dim); + int64_t mult = 1; + for (int64_t i = sparse_dim - 1; i >= 0; i--) { + indices_mult_cpu_vec[i] = mult; + mult *= full_size[i]; + } + auto indices_mult_cpu = indices.type().toBackend(/*Dense*/at::Backend::CPU) + .tensorFromBlob(indices_mult_cpu_vec.data(), /*size=*/{sparse_dim, 1}); + auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/true); + // Ideally we want matmul but matmul is slow on CPU Long and not implemented + // on CUDA Long. So mul is faster. + return indices.mul(indices_mult).sum(0); + } +} + + +}} // namespace at::sparse diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h index 950ab1d6f9181..fa27c4453da91 100644 --- a/aten/src/ATen/core/Tensor.h +++ b/aten/src/ATen/core/Tensor.h @@ -404,6 +404,7 @@ class CAFFE2_API Tensor { Tensor & log_normal_(double mean=1, double std=2, Generator * generator=nullptr); Tensor & exponential_(double lambd=1, Generator * generator=nullptr); Tensor & geometric_(double p, Generator * generator=nullptr); + Tensor alias() const; Tensor abs() const; Tensor & abs_(); Tensor acos() const; @@ -621,17 +622,22 @@ class CAFFE2_API Tensor { Tensor & sub_(Scalar other, Scalar alpha=1); Tensor addmm(const Tensor & mat1, const Tensor & mat2, Scalar beta=1, Scalar alpha=1) const; Tensor & addmm_(const Tensor & mat1, const Tensor & mat2, Scalar beta=1, Scalar alpha=1); - Tensor & sparse_resize_(IntList size, int64_t sparseDims, int64_t denseDims); - Tensor & sparse_resize_and_clear_(IntList size, int64_t sparseDims, int64_t denseDims); + Tensor & sparse_resize_(IntList size, int64_t sparse_dim, int64_t dense_dim); + Tensor & sparse_resize_and_clear_(IntList size, int64_t sparse_dim, int64_t dense_dim); Tensor sparse_mask(SparseTensorRef mask) const; Tensor to_dense() const; - int64_t _sparseDims() const; - int64_t _denseDims() const; + int64_t sparse_dim() const; + int64_t _dimI() const; + int64_t dense_dim() const; + int64_t _dimV() const; int64_t _nnz() const; Tensor coalesce() const; bool is_coalesced() const; Tensor _indices() const; Tensor _values() const; + Tensor & _coalesced_(bool coalesced); + Tensor indices() const; + Tensor values() const; int64_t numel() const; std::vector unbind(int64_t dim=0) const; int64_t get_device() const; diff --git a/aten/src/ATen/core/TensorMethods.h b/aten/src/ATen/core/TensorMethods.h index ba5044963c0fa..27e5a718d1687 100644 --- a/aten/src/ATen/core/TensorMethods.h +++ b/aten/src/ATen/core/TensorMethods.h @@ -515,6 +515,9 @@ inline Tensor & Tensor::exponential_(double lambd, Generator * generator) { inline Tensor & Tensor::geometric_(double p, Generator * generator) { return type().geometric_(*this, p, generator); } +inline Tensor Tensor::alias() const { + return type().alias(*this); +} inline Tensor Tensor::abs() const { return type().abs(*this); } @@ -1166,11 +1169,11 @@ inline Tensor Tensor::addmm(const Tensor & mat1, const Tensor & mat2, Scalar bet inline Tensor & Tensor::addmm_(const Tensor & mat1, const Tensor & mat2, Scalar beta, Scalar alpha) { return type().addmm_(*this, mat1, mat2, beta, alpha); } -inline Tensor & Tensor::sparse_resize_(IntList size, int64_t sparseDims, int64_t denseDims) { - return type().sparse_resize_(*this, size, sparseDims, denseDims); +inline Tensor & Tensor::sparse_resize_(IntList size, int64_t sparse_dim, int64_t dense_dim) { + return type().sparse_resize_(*this, size, sparse_dim, dense_dim); } -inline Tensor & Tensor::sparse_resize_and_clear_(IntList size, int64_t sparseDims, int64_t denseDims) { - return type().sparse_resize_and_clear_(*this, size, sparseDims, denseDims); +inline Tensor & Tensor::sparse_resize_and_clear_(IntList size, int64_t sparse_dim, int64_t dense_dim) { + return type().sparse_resize_and_clear_(*this, size, sparse_dim, dense_dim); } inline Tensor Tensor::sparse_mask(SparseTensorRef mask) const { return type().sparse_mask(*this, mask); @@ -1178,11 +1181,17 @@ inline Tensor Tensor::sparse_mask(SparseTensorRef mask) const { inline Tensor Tensor::to_dense() const { return type().to_dense(*this); } -inline int64_t Tensor::_sparseDims() const { - return type()._sparseDims(*this); +inline int64_t Tensor::sparse_dim() const { + return type().sparse_dim(*this); +} +inline int64_t Tensor::_dimI() const { + return type()._dimI(*this); +} +inline int64_t Tensor::dense_dim() const { + return type().dense_dim(*this); } -inline int64_t Tensor::_denseDims() const { - return type()._denseDims(*this); +inline int64_t Tensor::_dimV() const { + return type()._dimV(*this); } inline int64_t Tensor::_nnz() const { return type()._nnz(*this); @@ -1199,6 +1208,15 @@ inline Tensor Tensor::_indices() const { inline Tensor Tensor::_values() const { return type()._values(*this); } +inline Tensor & Tensor::_coalesced_(bool coalesced) { + return type()._coalesced_(*this, coalesced); +} +inline Tensor Tensor::indices() const { + return type().indices(*this); +} +inline Tensor Tensor::values() const { + return type().values(*this); +} inline int64_t Tensor::numel() const { return type().numel(*this); } diff --git a/aten/src/ATen/core/Type.h b/aten/src/ATen/core/Type.h index 5f666cdcb61fc..6ad8913363aa4 100644 --- a/aten/src/ATen/core/Type.h +++ b/aten/src/ATen/core/Type.h @@ -367,6 +367,7 @@ struct CAFFE2_API Type { virtual Tensor & log_normal_(Tensor & self, double mean, double std, Generator * generator) const = 0; virtual Tensor & exponential_(Tensor & self, double lambd, Generator * generator) const = 0; virtual Tensor & geometric_(Tensor & self, double p, Generator * generator) const = 0; + virtual Tensor alias(const Tensor & self) const = 0; virtual Tensor abs(const Tensor & self) const = 0; virtual Tensor & abs_(Tensor & self) const = 0; virtual Tensor acos(const Tensor & self) const = 0; @@ -584,17 +585,22 @@ struct CAFFE2_API Type { virtual Tensor & sub_(Tensor & self, Scalar other, Scalar alpha) const = 0; virtual Tensor addmm(const Tensor & self, const Tensor & mat1, const Tensor & mat2, Scalar beta, Scalar alpha) const = 0; virtual Tensor & addmm_(Tensor & self, const Tensor & mat1, const Tensor & mat2, Scalar beta, Scalar alpha) const = 0; - virtual Tensor & sparse_resize_(Tensor & self, IntList size, int64_t sparseDims, int64_t denseDims) const = 0; - virtual Tensor & sparse_resize_and_clear_(Tensor & self, IntList size, int64_t sparseDims, int64_t denseDims) const = 0; + virtual Tensor & sparse_resize_(Tensor & self, IntList size, int64_t sparse_dim, int64_t dense_dim) const = 0; + virtual Tensor & sparse_resize_and_clear_(Tensor & self, IntList size, int64_t sparse_dim, int64_t dense_dim) const = 0; virtual Tensor sparse_mask(const Tensor & self, SparseTensorRef mask) const = 0; virtual Tensor to_dense(const Tensor & self) const = 0; - virtual int64_t _sparseDims(const Tensor & self) const = 0; - virtual int64_t _denseDims(const Tensor & self) const = 0; + virtual int64_t sparse_dim(const Tensor & self) const = 0; + virtual int64_t _dimI(const Tensor & self) const = 0; + virtual int64_t dense_dim(const Tensor & self) const = 0; + virtual int64_t _dimV(const Tensor & self) const = 0; virtual int64_t _nnz(const Tensor & self) const = 0; virtual Tensor coalesce(const Tensor & self) const = 0; virtual bool is_coalesced(const Tensor & self) const = 0; virtual Tensor _indices(const Tensor & self) const = 0; virtual Tensor _values(const Tensor & self) const = 0; + virtual Tensor & _coalesced_(Tensor & self, bool coalesced) const = 0; + virtual Tensor indices(const Tensor & self) const = 0; + virtual Tensor values(const Tensor & self) const = 0; virtual int64_t numel(const Tensor & self) const = 0; virtual std::vector unbind(const Tensor & self, int64_t dim) const = 0; virtual int64_t get_device(const Tensor & self) const = 0; diff --git a/aten/src/ATen/core/aten_interned_strings.h b/aten/src/ATen/core/aten_interned_strings.h index e7f487f264f64..8045fe3458f0d 100644 --- a/aten/src/ATen/core/aten_interned_strings.h +++ b/aten/src/ATen/core/aten_interned_strings.h @@ -96,7 +96,6 @@ _(aten, _logspace) \ _(aten, _masked_scale) \ _(aten, _mm) \ _(aten, _mv) \ -_(aten, _native_sparse_coo_tensor_unsafe) \ _(aten, _nnz) \ _(aten, _pack_padded_sequence) \ _(aten, _pack_padded_sequence_backward) \ @@ -364,6 +363,7 @@ _(aten, index_copy) \ _(aten, index_fill) \ _(aten, index_put) \ _(aten, index_select) \ +_(aten, indices) \ _(aten, instance_norm) \ _(aten, inverse) \ _(aten, irfft) \ @@ -478,7 +478,6 @@ _(aten, native_get_device) \ _(aten, native_norm) \ _(aten, native_pow) \ _(aten, native_resize_as) \ -_(aten, native_sparse_coo_tensor) \ _(aten, native_tensor) \ _(aten, native_zero) \ _(aten, ne) \ @@ -594,6 +593,8 @@ _(aten, softshrink_backward) \ _(aten, softshrink_forward) \ _(aten, sort) \ _(aten, sparse_coo_tensor) \ +_(aten, sparse_coo_tensor_with_dims) \ +_(aten, sparse_coo_tensor_with_dims_and_tensors) \ _(aten, sparse_mask) \ _(aten, sparse_resize) \ _(aten, sparse_resize_and_clear) \ @@ -685,6 +686,7 @@ _(aten, upsample_nearest3d_forward) \ _(aten, upsample_trilinear3d) \ _(aten, upsample_trilinear3d_backward) \ _(aten, upsample_trilinear3d_forward) \ +_(aten, values) \ _(aten, var) \ _(aten, view) \ _(aten, view_as) \ @@ -737,7 +739,7 @@ _(attr, cudnn_enabled) \ _(attr, cx) \ _(attr, cy) \ _(attr, data) \ -_(attr, denseDims) \ +_(attr, dense_dim) \ _(attr, descending) \ _(attr, deterministic) \ _(attr, device) \ @@ -931,7 +933,8 @@ _(attr, some) \ _(attr, sorted) \ _(attr, source) \ _(attr, sparse) \ -_(attr, sparseDims) \ +_(attr, sparse_dim) \ +_(attr, sparse_dtype) \ _(attr, spatialScale) \ _(attr, split_size) \ _(attr, split_sizes) \ diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py index 132c036b0358f..1d68fd365e175 100644 --- a/aten/src/ATen/function_wrapper.py +++ b/aten/src/ATen/function_wrapper.py @@ -499,6 +499,7 @@ def __getitem__(self, x): 'native_type_method_dispatch': str, # options should be List[FunctionOption] 'options': Any, + 'requires_tensor': bool, 'return_call': str, 'return_type': str, 'return': ReturnDecl, @@ -527,6 +528,7 @@ def __getitem__(self, x): ('returns', List[ReturnType]), ('inplace', bool), ('abstract', bool), + ('requires_tensor', bool), ('device_guard', bool), ('with_gil', bool), ('deprecated', bool), @@ -559,7 +561,8 @@ def is_mutable_formal_argument(argument, option): def check_methods_do_not_start_with_underscore(name, is_method): - if name in {'_local_scalar', '_values', '_indices', '_nnz', '_sparseDims', '_denseDims'}: + if name in {'_local_scalar', '_values', '_indices', '_nnz', '_dimI', + '_dimV', '_coalesced_'}: return if is_method and name.startswith('_') and not name.startswith('__'): message = "Function '{}' starts with a single underscore and is ".format(name) @@ -923,6 +926,7 @@ def process_option(option, output_options): inplace=option['inplace'], # See Note [Abstract ATen methods] abstract=abstract, + requires_tensor=option.get('requires_tensor', False), device_guard=option.get('device_guard', True), with_gil=option.get('with_gil', False), deprecated=option.get('deprecated', False) @@ -1160,6 +1164,7 @@ def find_formal(formal_name, formals): inplace=option['inplace'], # See Note [Abstract ATen methods] abstract=abstract, + requires_tensor=option.get('requires_tensor', False), device_guard=option.get('device_guard', True), with_gil=option.get('with_gil', False), deprecated=option['deprecated'], diff --git a/aten/src/ATen/native/LegacyBridge.cpp b/aten/src/ATen/native/LegacyBridge.cpp index 930832416db17..1537fcd2db8c8 100644 --- a/aten/src/ATen/native/LegacyBridge.cpp +++ b/aten/src/ATen/native/LegacyBridge.cpp @@ -134,33 +134,6 @@ Tensor& addmm_(Tensor& self, const Tensor& mat1, const Tensor& mat2, Scalar beta } } -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values) { - return at::getType(values.options().layout(at::kSparse)).native_sparse_coo_tensor(indices, values); -} - -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, ArrayRef size) { - return at::getType(values.options().layout(at::kSparse)).native_sparse_coo_tensor(indices, values, size); -} - -Tensor sparse_coo_tensor(ArrayRef size, const TensorOptions& options) { - TensorOptions toptions = TensorOptions(options).layout(at::kSparse); - return at::getType(toptions).native_sparse_coo_tensor(size, toptions); -} - -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, const TensorOptions& options) { - TensorOptions toptions = options; - return at::getType(toptions.layout(at::kSparse)).native_sparse_coo_tensor(indices, values); -} - -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values, ArrayRef size, const TensorOptions& options) { - TensorOptions toptions = options; - return at::getType(toptions.layout(at::kSparse)).native_sparse_coo_tensor(indices, values, size); -} - -Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values, ArrayRef size) { - return at::getType(values.options().layout(at::kSparse))._native_sparse_coo_tensor_unsafe(indices, values, size); -} - int64_t get_device(const Tensor& self) { if (_has_native(self)) { return native_get_device(self); diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp index e9241c368c34b..4894c2cecc129 100644 --- a/aten/src/ATen/native/TensorFactories.cpp +++ b/aten/src/ATen/native/TensorFactories.cpp @@ -153,7 +153,7 @@ Tensor empty_like(const Tensor& self) { Tensor empty_like(const Tensor& self, const TensorOptions& options) { if (options.layout() == kSparse && self.type().is_sparse()) { auto res = at::empty({0}, options); // to be resized - res.sparse_resize_and_clear_(self.sizes(), self._sparseDims(), self._denseDims()); + res.sparse_resize_and_clear_(self.sizes(), self.sparse_dim(), self.dense_dim()); return res; } return at::empty(self.sizes(), options); @@ -525,7 +525,7 @@ Tensor zeros_like(const Tensor& self) { Tensor zeros_like(const Tensor& self, const TensorOptions& options) { if (options.layout() == kSparse && self.type().is_sparse()) { auto res = at::empty({0}, options); // to be resized - res.sparse_resize_and_clear_(self.sizes(), self._sparseDims(), self._denseDims()); + res.sparse_resize_and_clear_(self.sizes(), self.sparse_dim(), self.dense_dim()); return res; } return native::zeros(self.sizes(), options); diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index 387bebce3932a..5f2b8af406b84 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -9,6 +9,9 @@ #include "ATen/WrapDimUtils.h" #include "c10/util/Exception.h" #include "c10/util/Optional.h" +#include +#include +#include namespace at { namespace native { @@ -163,7 +166,7 @@ Tensor &as_strided_(Tensor& self, IntList size, IntList stride) { return at::as_strided_(self, size, stride, self.storage_offset()); } -Tensor narrow_copy_sparse(const Tensor& self, int64_t dim, int64_t start, int64_t length){ +Tensor narrow_copy_sparse(const Tensor& self, int64_t dim, int64_t start, int64_t length) { int64_t allDim = self.dim(); int64_t end = start+length; AT_CHECK(allDim > 0, "narrow() cannot be applied to a 0-dim tensor."); @@ -172,30 +175,29 @@ Tensor narrow_copy_sparse(const Tensor& self, int64_t dim, int64_t start, int64_ AT_CHECK(start >= 0 && length >= 0 && end <= self.size(dim), "Invalid range to narrow. range(start, start+length) must be a subset of range(0, ", self.size(dim), ").") LongTensor indices = self._indices(); - int64_t sparseDims = self._sparseDims(); + int64_t sparse_dim = self.sparse_dim(); - std::vector newSizes = self.sizes().vec(); - newSizes[dim]=length; + std::vector new_sizes = self.sizes().vec(); + new_sizes[dim] = length; - Tensor newValues; - LongTensor newIndices; - if(dim < sparseDims){ + Tensor new_values; + LongTensor new_indices; + if (dim < sparse_dim) { Tensor mask = (indices[dim] >= start).__and__((indices[dim] < end)); - newIndices = indices.masked_select(mask).view({sparseDims, -1}); - newIndices[dim].add_(-start); + new_indices = indices.masked_select(mask).view({sparse_dim, -1}); + new_indices[dim].sub_(start); Tensor nzIndices = mask.nonzero().view(-1); - newValues = self._values().index_select(0, nzIndices); - }else{ + new_values = self._values().index_select(0, nzIndices); + } else { /* This means we are narrowing on a dense dim, which is in effect just a regular narrow on _values() */ - newIndices = indices; - int64_t ddim = dim - sparseDims + 1; - newValues = self._values().narrow_copy(ddim, start, length); + new_indices = indices; + int64_t dense_dim = dim - sparse_dim + 1; + new_values = self._values().narrow_copy(dense_dim, start, length); } - SparseTensor newTensor = at::sparse_coo_tensor(newIndices, newValues, newSizes, self.type().options()); - _get_sparse_impl(newTensor)->set_coalesced(self.is_coalesced()); - return newTensor; + auto newTensor = at::sparse_coo_tensor(new_indices, new_values, new_sizes); + return newTensor._coalesced_(self.is_coalesced()); } Tensor narrow_copy_dense(const Tensor& self, int64_t dim, int64_t start, int64_t length){ @@ -397,16 +399,16 @@ Tensor& stack_out(Tensor& result, TensorList tensors, int64_t dim) { } static inline Tensor & sparse_transpose_(Tensor & self, int64_t dim0, int64_t dim1) { - int64_t nsparseDims = self._sparseDims(); - AT_CHECK(dim0 < nsparseDims && dim1 < nsparseDims, + int64_t nsparse_dim = self.sparse_dim(); + AT_CHECK(dim0 < nsparse_dim && dim1 < nsparse_dim, "sparse transpose: transposed dimensions must be sparse ", - "Got sparseDims: ", nsparseDims, ", d0: ", dim0, ", d1: ", dim1); + "Got sparse_dim: ", nsparse_dim, ", d0: ", dim0, ", d1: ", dim1); if (self._indices().numel() == 0 && self._values().numel() == 0) { auto sizes = self.sizes().vec(); std::swap(sizes[dim0], sizes[dim1]); - _get_sparse_impl(self)->raw_resize_(self._sparseDims(), self._denseDims(), sizes); + at::sparse::get_sparse_impl(self)->raw_resize_(self.sparse_dim(), self.dense_dim(), sizes); } else { auto indices = self._indices(); auto row0 = indices.select(0, dim0); @@ -418,12 +420,12 @@ static inline Tensor & sparse_transpose_(Tensor & self, int64_t dim0, int64_t di row0.copy_(row1); row1.copy_(tmp); - _get_sparse_impl(self)->set_coalesced(false); + self._coalesced_(false); auto sizes = self.sizes().vec(); std::swap(sizes[dim0], sizes[dim1]); - _get_sparse_impl(self)->raw_resize_(self._indices().size(0), self._values().dim() - 1, sizes); + at::sparse::get_sparse_impl(self)->raw_resize_(self._indices().size(0), self._values().dim() - 1, sizes); } return self; } @@ -469,11 +471,11 @@ Tensor transpose(const Tensor & self, int64_t dim0, int64_t dim1) { static void check_t(const Tensor& self, const char *fn) { if (self.is_sparse()) { - int64_t sparseDims = self._sparseDims(); - int64_t denseDims = self._denseDims(); - AT_CHECK(sparseDims == 2 && denseDims == 0, + int64_t sparse_dim = self.sparse_dim(); + int64_t dense_dim = self.dense_dim(); + AT_CHECK(sparse_dim == 2 && dense_dim == 0, fn, " expects a tensor with 2 sparse and 0 dense dimensions, but got ", - sparseDims, " sparse and ", denseDims, " dense dimensions"); + sparse_dim, " sparse and ", dense_dim, " dense dimensions"); } else if (self.dim() != 2) { AT_ERROR(fn, " expects a 2D tensor, but self is ", self.dim(), "D"); } diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 49c05ecdf6e78..9584c51746c62 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -1929,66 +1929,131 @@ - func: addmm_(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor variants: method -# NB: I have to decompose sparse_coo_tensor into two functions, because -# it has custom dispatch logic for which Type to dispatch on (we must -# use the sparse equivalent of the type of the SECOND argument). -# -# The actual dispatcher, native_sparse_coo_tensor, has all of its overloads -# removed so you don't accidentally trigger the default behavior, which -# is to infer Type based on the first argument (indices), which is ~never -# what you want. (I guess hypothetically it would work; you'd -# just only ever dispatch to CPULongTensor or CUDALongTensor, but that -# seems a bit too finely balanced.) - -- func: native_sparse_coo_tensor(IntList size, TensorOptions options) -> Tensor - variants: [] - dispatch: - SparseCPU: new_with_size_sparse - SparseCUDA: new_with_size_sparse -- func: native_sparse_coo_tensor(IndexTensor indices, Tensor values) -> Tensor - variants: [] - dispatch: - SparseCPU: new_with_tensor_sparse - SparseCUDA: new_with_tensor_sparse +# NOTE [ Sparse: autograd and API ] +# +# Goals: 1. Flexible API for users to write custom sparse ops +# 2. ctor and member accessor with autograd support +# +# To achieve 1, we need to provide a set of *dangerous* APIs (dangerous in the +# sense that misusing them will break sparse tensor invariant and may result in +# unexpected behavior, e.g., crash). These methods are all prefixed with +# underscore "_" to indicate that they should be used with care. We provide: +# +# + `_indices()`: returns the *raw* indices within the sparse tensor (not just +# sharing storage). Any inplace operation will change the +# actual indices, including t_, set_, as_strided_, resize_, +# etc. +# + `_values()`: returns the *raw* values within the sparse tensor. Similar +# semantics as `_indices()` +# + `_nnz()`: returns the number of non-zero entries. This will always be +# determined by the shapes of indices and values. +# + `_coalesced_(bool)`: inplace sets whether the tensor is coalesced, and +# returns itself. +# +# These methods are very useful in writing new operations, e.g., a custom +# autograd Function. +# +# We also provide other public *safe* APIs: +# + `indices()`: returns a **view** of the indices tensor if the sparse tensor +# is **coalesced**. +# + `values()`: returns a **view** of the values tensor if the containing +# sparse tensor is **coalesced**. +# + `sparse_dim()`: number of sparse dimensions +# + `dense_dim()`: number of dense dimensions +# + `is_coalesced()`: whether the sparse tensor is coalesced +# +# `_indices` and `_values` should returns the raw indices and values dense +# tensors within a sparse tensor. They can be quite unsafe with inplace +# operations like `t_`, and exposes uncoalesced indices and values. The public +# recommended API is `indices` and `values`, both of which first check that the +# tensor is coalesced and return views on those tensors. +# +# Autograd support: +# +# Autograd is supported on `_values`, `values` and sparse tensor ctor with +# indices and values tensors. E.g., `torch.sparse_coo_tensor(i, v).values().sum()` +# is differentiable w.r.t. `v`. +# +# Before reading the following, see NOTE [ Autograd Variable Views ] in +# variable.h for details on views that are tracked by autograd, and views that +# are not. +# +# Moreover, these methods return tensors that share storage with inputs, so we +# mark these methods as view ops to support autograd history tracking. +# The sparse tensor ctor output should technically be view of both input indices +# and values tensors, but currently we only support setting as view of a single +# Variable, so it is only view of the values tensor. +# TODO: clone indices in sparse tensor ctor. +# +# For other methods that return outputs that share storage with inputs, i.e., +# `indices()` and `_indices()`. We mark their outputs as non-differentiable, so +# the view relation is not tracked by autograd, but the version counter is still +# shared. In other words, their outputs are non-differentiable views of the +# sparse tensor. -- func: native_sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size) -> Tensor - variants: [] - dispatch: - SparseCPU: new_with_tensor_and_size_sparse - SparseCUDA: new_with_tensor_and_size_sparse -- func: sparse_coo_tensor(IndexTensor indices, Tensor values) -> Tensor +# NOTE [ Sparse tensor ctors ] +# +# The API entry points to sparse tensor construction should be +# `sparse_coo tensor` and `_sparse_coo_tensor_unsafe`. Depending on whether the +# indices and values tensors are given, they eventually dispatch to either +# `sparse_coo_tensor_with_dims` or `sparse_coo_tensor_with_dims_and_tensors`. +# +# The autograd support for ctor is implement on `sparse_coo_tensor_with_dims_and_tensors`. +# +# The API methods `sparse_coo tensor` and `_sparse_coo_tensor_unsafe` +# **must not** have specific type dispatches because otherwise codegen will +# consider them as abstract methods (see Note [Abstract ATen methods]), dispatch +# using **Tensor** type, and thus lose autograd tracking on the actual method +# they dispatch to, e.g., `sparse_coo_tensor_with_dims_and_tensors`. +# +# The actual ctors `sparse_coo_tensor_with_dims` and `sparse_coo_tensor_with_dims_and_tensors`, +# on the other hand, need to create `SparseTensorImpl` and know nothing about +# how `VariableType`s work. So they need to be dispatched using Tensor types. +# We thus put `requires_tensor=True` to ensure that `VariableType` will unwrap +# the given variables and call with the Tensor type. -- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size) -> Tensor # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given # the default would never make sense. - func: sparse_coo_tensor(IntList size, *, TensorOptions options) -> Tensor -- func: sparse_coo_tensor(IndexTensor indices, Tensor values, *, TensorOptions options) -> Tensor +- func: sparse_coo_tensor(IndexTensor indices, Tensor values) -> Tensor + +- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size) -> Tensor + +- func: _sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size) -> Tensor -- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size, *, TensorOptions options) -> Tensor -- func: _native_sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size) -> Tensor +- func: sparse_coo_tensor_with_dims(Type sparse_dtype, int64_t sparse_dim, int64_t dense_dim, IntList size) -> Tensor variants: [] dispatch: - SparseCPU: new_with_tensor_and_size_unsafe_sparse - SparseCUDA: new_with_tensor_and_size_unsafe_sparse + SparseCPU: new_with_dims_sparse + SparseCUDA: new_with_dims_sparse + requires_tensor: True -- func: _sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size) -> Tensor +- func: sparse_coo_tensor_with_dims_and_tensors(Type sparse_dtype, int64_t sparse_dim, int64_t dense_dim, IntList size, Tensor indices, Tensor values) -> Tensor + variants: [] + dispatch: + SparseCPU: new_with_dims_and_tensor_sparse + SparseCUDA: new_with_dims_and_tensor_sparse + requires_tensor: True -- func: sparse_resize_(Tensor self, IntList size, int64_t sparseDims, int64_t denseDims) -> Tensor + +- func: sparse_resize_(Tensor self, IntList size, int64_t sparse_dim, int64_t dense_dim) -> Tensor variants: method dispatch: SparseCPU: sparse_resize_ SparseCUDA: sparse_resize_ + requires_tensor: True -- func: sparse_resize_and_clear_(Tensor self, IntList size, int64_t sparseDims, int64_t denseDims) -> Tensor +- func: sparse_resize_and_clear_(Tensor self, IntList size, int64_t sparse_dim, int64_t dense_dim) -> Tensor variants: method dispatch: SparseCPU: sparse_resize_and_clear_ SparseCUDA: sparse_resize_and_clear_ + requires_tensor: True - func: sparse_mask(Tensor self, SparseTensorRef mask) -> Tensor @@ -1996,6 +2061,7 @@ dispatch: CPU: sparse_mask_cpu CUDA: sparse_mask_cuda + requires_tensor: True - func: to_dense(Tensor self) -> Tensor @@ -2003,33 +2069,38 @@ dispatch: SparseCPU: sparse_to_dense SparseCUDA: sparse_to_dense + requires_tensor: True -- func: _sparseDims(Tensor self) -> int64_t +- func: sparse_dim(Tensor self) -> int64_t variants: method dispatch: - SparseCPU: _sparseDims_sparse - SparseCUDA: _sparseDims_sparse + SparseCPU: sparse_dim_sparse + SparseCUDA: sparse_dim_sparse + requires_tensor: True device_guard: False # legacy method - func: _dimI(Tensor self) -> int64_t - variants: function - dispatch: _sparseDims_sparse + variants: method + dispatch: sparse_dim_sparse + requires_tensor: True device_guard: False -- func: _denseDims(Tensor self) -> int64_t +- func: dense_dim(Tensor self) -> int64_t variants: method dispatch: - SparseCPU: _denseDims_sparse - SparseCUDA: _denseDims_sparse + SparseCPU: dense_dim_sparse + SparseCUDA: dense_dim_sparse + requires_tensor: True device_guard: False # legacy method - func: _dimV(Tensor self) -> int64_t - variants: function - dispatch: _denseDims_sparse + variants: method + dispatch: dense_dim_sparse + requires_tensor: True device_guard: False @@ -2038,6 +2109,7 @@ dispatch: SparseCPU: _nnz_sparse SparseCUDA: _nnz_sparse + requires_tensor: True device_guard: False @@ -2046,6 +2118,7 @@ dispatch: SparseCPU: coalesce_sparse_cpu SparseCUDA: coalesce_sparse_cuda + requires_tensor: True - func: is_coalesced(Tensor self) -> bool @@ -2053,6 +2126,7 @@ dispatch: SparseCPU: is_coalesced_sparse SparseCUDA: is_coalesced_sparse + requires_tensor: True device_guard: False @@ -2061,14 +2135,42 @@ dispatch: SparseCPU: _indices_sparse SparseCUDA: _indices_sparse + requires_tensor: True device_guard: False - - func: _values(Tensor self) -> Tensor variants: method dispatch: SparseCPU: _values_sparse SparseCUDA: _values_sparse + requires_tensor: True + device_guard: False + +# This method doesn't do any check but only directly sets the flag. So it can be +# a bit unsafe. Similar to _indices and _values, this is useful for implementing +# custom sparse operations in Python/C++ extension. +- func: _coalesced_(Tensor self, bool coalesced) -> Tensor + variants: method + dispatch: + SparseCPU: _coalesced_sparse_ + SparseCUDA: _coalesced_sparse_ + requires_tensor: True + device_guard: False + +- func: indices(Tensor self) -> Tensor + variants: method + dispatch: + SparseCPU: indices_sparse + SparseCUDA: indices_sparse + requires_tensor: True + device_guard: False + +- func: values(Tensor self) -> Tensor + variants: method + dispatch: + SparseCPU: values_sparse + SparseCUDA: values_sparse + requires_tensor: True device_guard: False @@ -2076,17 +2178,20 @@ dispatch: SparseCPU: hspmm_out_sparse_cpu SparseCUDA: hspmm_out_sparse_cuda + requires_tensor: True - func: hspmm(Tensor mat1, Tensor mat2) -> Tensor dispatch: SparseCPU: hspmm_sparse_cpu SparseCUDA: hspmm_sparse_cuda + requires_tensor: True - func: copy_sparse_to_sparse_(Tensor self, Tensor src, bool non_blocking=false) -> Tensor variants: function dispatch: SparseCPU: copy_sparse_ SparseCUDA: copy_sparse_ + requires_tensor: True - func: numel(Tensor self) -> int64_t variants: function, method diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index 107233ecfccb4..85edc40588b62 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -1,63 +1,73 @@ // Basic functions on sparse tensors #include +#include #include #include #include -#include +#include #include namespace at { namespace native { +using namespace at::sparse; + + /****************************************************************************** * access methods ******************************************************************************/ -int64_t _sparseDims_sparse(const SparseTensor& self) { - return _get_sparse_impl(self)->sparseDims(); +int64_t sparse_dim_sparse(const SparseTensor& self) { + return get_sparse_impl(self)->sparse_dim(); } -int64_t _denseDims_sparse(const SparseTensor& self) { - return _get_sparse_impl(self)->denseDims(); +int64_t dense_dim_sparse(const SparseTensor& self) { + return get_sparse_impl(self)->dense_dim(); } bool is_coalesced_sparse(const SparseTensor& self) { - return _get_sparse_impl(self)->coalesced(); + return get_sparse_impl(self)->coalesced(); } int64_t _nnz_sparse(const SparseTensor& self) { - return _get_sparse_impl(self)->nnz(); + return get_sparse_impl(self)->nnz(); } -// TODO: This is wrong: if nnz == 0 but indices/values is not -// empty then we'll return all the values, even the ones that -// are "masked out" by nnz +// Why are there so many methods to get indices and value? +// See Note [ Sparse: different methods to get indices and values ] in native_functions.yaml Tensor _indices_sparse(const SparseTensor& self) { - auto nnz = self._nnz(); - if (nnz == 0) { - // Narrows don't work on 0-length tensors - // TODO: When we handle zero-size dims correctly, this will work and - // we can remove the special case. - return _get_sparse_impl(self)->indices(); - } - return _get_sparse_impl(self)->indices().narrow(1, 0, nnz); + return get_sparse_impl(self)->indices(); } Tensor _values_sparse(const SparseTensor& self) { - // See indices for some relevant notes - auto nnz = self._nnz(); - if (nnz == 0) { - return _get_sparse_impl(self)->values(); - } - return _get_sparse_impl(self)->values().narrow(0, 0, nnz); + return get_sparse_impl(self)->values(); +} + +Tensor &_coalesced_sparse_(SparseTensor& self, bool coalesced) { + get_sparse_impl(self)->set_coalesced(coalesced); + return self; +} + +Tensor indices_sparse(const Tensor& self) { + AT_CHECK(self.is_coalesced(), + "Cannot get indices on an uncoalesced tensor, please call .coalesce() first"); + return get_sparse_impl(self)->indices().alias(); +} + +Tensor values_sparse(const Tensor& self) { + AT_CHECK(self.is_coalesced(), + "Cannot get values on an uncoalesced tensor, please call .coalesce() first"); + return get_sparse_impl(self)->values().alias(); } /****************************************************************************** * creation methods ******************************************************************************/ +/*** Helper methods ***/ + /* Empty init */ SparseTensor new_sparse(const TensorOptions& options) { AT_ASSERT(!options.is_variable()); @@ -72,10 +82,44 @@ SparseTensor new_sparse(const TensorOptions& options) { type_id, scalarTypeToTypeMeta(options.dtype())); } -/*** Helper methods ***/ + +/** Actual dispatched methods ***/ + +SparseTensor new_with_dims_sparse(const SparseType& sparse_dtype, int64_t sparse_dim, int64_t dense_dim, ArrayRef size) { + SparseTensor self = new_sparse(sparse_dtype); + AT_CHECK(size.size() != 0, + "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ +or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); + get_sparse_impl(self)->resize_and_clear_(sparse_dim, dense_dim, size); + return self; +} + +// Does NOT make copies of indices and values +SparseTensor new_with_dims_and_tensor_sparse( + const SparseType& sparse_dtype, + int64_t sparse_dim, + int64_t dense_dim, + ArrayRef size, + const LongTensor& indices, + const Tensor& values) { + SparseTensor self = new_sparse(sparse_dtype); + get_sparse_impl(self)->resize_(sparse_dim, dense_dim, size); + alias_into_sparse(self, indices, values); + return self; +} + +/* Shape init */ +Tensor sparse_coo_tensor(ArrayRef size, const TensorOptions& options) { + TensorOptions toptions = options; + // Use type-dispatch to get rid of variable type. + return at::getType(toptions.layout(at::kSparse)).sparse_coo_tensor_with_dims(size.size(), 0, size); +} /* Pointer-copy init */ -SparseTensor new_with_tensor_sparse(const LongTensor& indices, const Tensor& values_) { +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_) { + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); + AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); + Tensor values; if (values_.dim() == 0) { // Mimic Numpy behavior here and treat it as a 1D tensor @@ -84,42 +128,49 @@ SparseTensor new_with_tensor_sparse(const LongTensor& indices, const Tensor& val values = values_; } - const SparseType& dtype = values.type().toSparse(); - // If sizes are not given, it is inferred as max index of each dim. - int64_t sparseDims = indices.size(0); - int64_t denseDims = values.dim() - 1; + int64_t sparse_dim = indices.size(0); + int64_t dense_dim = values.dim() - 1; - std::vector computed_sizes(sparseDims + denseDims); + std::vector computed_sizes(sparse_dim + dense_dim); if (indices.numel() > 0) { // If the indices has elements in it, we infer the minimum sparse dimension sizes // as the max value of each dim in indices. // NB: It used to keepdim. I think that was wrong. + LongTensor min_indices = std::get(indices.min(/* dim */ 1, /* keepdim */ false)); LongTensor computed_indices_sizes = std::get(indices.max(/* dim */ 1, /* keepdim */ false)); computed_indices_sizes.add_(1); // len = max_index + 1 - LongTensor cpu_computed_indices_sizes; + LongTensor cpu_min_indices, cpu_computed_indices_sizes; if (computed_indices_sizes.is_cuda()) { cpu_computed_indices_sizes = at::empty(computed_indices_sizes.sizes(), at::initialTensorOptions().dtype(kLong)); cpu_computed_indices_sizes.copy_(computed_indices_sizes); + cpu_min_indices = min_indices.to(at::DeviceType::CPU); } else { + cpu_min_indices = min_indices; cpu_computed_indices_sizes = computed_indices_sizes; } + auto cpu_min_indices_accessor = cpu_min_indices.accessor(); auto cpu_computed_indices_sizes_accessor = cpu_computed_indices_sizes.accessor(); - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { + int64_t min_index_in_dim = cpu_min_indices_accessor[d]; + AT_CHECK(min_index_in_dim >= 0, + "found negative index ", min_index_in_dim, " for dim ", d); computed_sizes[static_cast(d)] = cpu_computed_indices_sizes_accessor[d]; } } else { // If the indices doesn't have elements in it, there is not enough information // to know what the minimum sparse dimension sizes should be, and in this case // we set them to 0 - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { computed_sizes[static_cast(d)] = 0; } } - for (int64_t d = 0; d < denseDims; d++) { - computed_sizes[static_cast(sparseDims + d)] = values.size(d+1); + for (int64_t d = 0; d < dense_dim; d++) { + computed_sizes[static_cast(sparse_dim + d)] = values.size(d+1); } - return _new_with_dims_and_tensor_sparse(dtype, sparseDims, denseDims, computed_sizes, indices, values); + + const TensorOptions options = values.options().layout(at::kSparse); + return at::getType(options).sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, computed_sizes, indices, values); } SparseTensor new_with_dims_and_size_sparse(int64_t sparseDims, int64_t denseDims, ArrayRef size, const TensorOptions& options) { @@ -148,33 +199,11 @@ Tensor empty_sparse(IntList size, const TensorOptions& options) { return tensor; } -SparseTensor new_with_size_sparse(IntList size, const TensorOptions& options) { - return new_with_dims_and_size_sparse(size.size(), 0, size, options); -} - -// NOTE: new_with_tensor_and_size_unsafe_sparse() differs from new_with_tensor_and_size_sparse() -// in that we don't check whether any indices are out of boundaries of `sizes`, thus avoiding a -// copy from CUDA to CPU. However, this function should ONLY be used where we know that the indices -// are guaranteed to be within bounds. // NB: Got rid of the sizes == NULL case -SparseTensor new_with_tensor_and_size_unsafe_sparse(const LongTensor& indices, const Tensor& values_, ArrayRef sizes) { - Tensor values; - if (values_.dim() == 0) { - // Mimic Numpy behavior here and treat it as a 1D tensor - values = values_.expand({1}); - } else { - values = values_; - } - - const SparseType& dtype = values.type().toSparse(); +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef size) { + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); + AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); - int64_t sparseDims = indices.size(0); - int64_t denseDims = values.dim() - 1; - return _new_with_dims_and_tensor_sparse(dtype, sparseDims, denseDims, sizes, indices, values); -} - -// NB: Got rid of the sizes == NULL case -SparseTensor new_with_tensor_and_size_sparse(const LongTensor& indices, const Tensor& values_, ArrayRef sizes) { Tensor values; if (values_.dim() == 0) { // Mimic Numpy behavior here and treat it as a 1D tensor @@ -183,73 +212,97 @@ SparseTensor new_with_tensor_and_size_sparse(const LongTensor& indices, const Te values = values_; } - const SparseType& dtype = values.type().toSparse(); + int64_t sparse_dim = indices.size(0); + int64_t dense_dim = values.dim() - 1; + AT_CHECK(size.size() == sparse_dim + dense_dim, + "number of dimensions must be sparse_dim (", sparse_dim, ") + dense_dim (", dense_dim, "), but got ", size.size()); - int64_t sparseDims = indices.size(0); - int64_t denseDims = values.dim() - 1; - AT_CHECK(sizes.size() == sparseDims + denseDims, "number of dimensions must be sparseDims (", sparseDims, ") + denseDims (", denseDims, "), but got ", sizes.size()); - - // Check to make sure all indices are within the boundaries of `sizes` + // Check to make sure all indices are within the boundaries of `size` if (indices.numel() > 0) { LongTensor min_indices = std::get(indices.min(/* dim */ 1, /* keepdim */ false)); LongTensor max_indices = std::get(indices.max(/* dim */ 1, /* keepdim */ false)); LongTensor cpu_min_indices, cpu_max_indices; if (indices.is_cuda()) { - cpu_min_indices = at::CPU(kLong).copy(min_indices); - cpu_max_indices = at::CPU(kLong).copy(max_indices); + cpu_min_indices = min_indices.to(at::DeviceType::CPU); + cpu_max_indices = max_indices.to(at::DeviceType::CPU); } else { cpu_min_indices = min_indices; cpu_max_indices = max_indices; } auto cpu_min_indices_accessor = cpu_min_indices.accessor(); auto cpu_max_indices_accessor = cpu_max_indices.accessor(); - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { // NB: This used to sync ndim times to access each entry; now we copy // everything to CPU first and then access it. int64_t min_index_in_dim = cpu_min_indices_accessor[d]; AT_CHECK(min_index_in_dim >= 0, "found negative index ", min_index_in_dim, " for dim ", d); int64_t max_index_in_dim = cpu_max_indices_accessor[d]; - int64_t dim_size = sizes[static_cast(d)]; + int64_t dim_size = size[static_cast(d)]; AT_CHECK(max_index_in_dim < dim_size, - "sizes is inconsistent with indices: for dim ", d, ", size is ", dim_size, " but found index ", max_index_in_dim); + "size is inconsistent with indices: for dim ", d, ", size is ", dim_size, " but found index ", max_index_in_dim); } } - return _new_with_dims_and_tensor_sparse(dtype, sparseDims, denseDims, sizes, indices, values); + + const TensorOptions options = values.options().layout(at::kSparse); + return at::getType(options).sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values); +} + +// NOTE: _sparse_coo_tensor_unsafe() differs from sparse_coo_tensor() +// in that we don't check whether any indices are out of boundaries of `size`, thus avoiding a +// copy from CUDA to CPU. However, this function should ONLY be used where we know that the indices +// are guaranteed to be within bounds. +// NB: Got rid of the size == NULL case +Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values_, ArrayRef size) { + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); + AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); + + Tensor values; + if (values_.dim() == 0) { + // Mimic Numpy behavior here and treat it as a 1D tensor + values = values_.expand({1}); + } else { + values = values_; + } + + int64_t sparse_dim = indices.size(0); + int64_t dense_dim = values.dim() - 1; + + const TensorOptions options = values.options().layout(at::kSparse); + return at::getType(options).sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values); } // NB: Deleted newWithSizeNd variants SparseTensor clone_sparse(const SparseTensor& self) { - SparseTensor other = new_with_dims_and_size_sparse(self._sparseDims(), self._denseDims(), self.sizes(), self.options()); - _copy_into_sparse(other, _get_sparse_impl(self)->indices(), _get_sparse_impl(self)->values(), true); - _get_sparse_impl(other)->set_coalesced(self.is_coalesced()); - return other; + SparseTensor other = new_with_dims_sparse(self.sparse_dim(), self.dense_dim(), self.sizes(), self.options()); + copy_into_sparse(other, self._indices(), self._values(), true); + return other._coalesced_(self.is_coalesced()); } /****************************************************************************** * reshaping methods ******************************************************************************/ -SparseTensor& sparse_resize_(SparseTensor& self, ArrayRef size, int64_t sparseDims, int64_t denseDims) { - _get_sparse_impl(self)->resize_(sparseDims, denseDims, size); +SparseTensor& sparse_resize_(SparseTensor& self, ArrayRef size, int64_t sparse_dim, int64_t dense_dim) { + get_sparse_impl(self)->resize_(sparse_dim, dense_dim, size); return self; } -SparseTensor& sparse_resize_and_clear_(SparseTensor& self, ArrayRef size, int64_t sparseDims, int64_t denseDims) { - _get_sparse_impl(self)->resize_and_clear_(sparseDims, denseDims, size); +SparseTensor& sparse_resize_and_clear_(SparseTensor& self, ArrayRef size, int64_t sparse_dim, int64_t dense_dim) { + get_sparse_impl(self)->resize_and_clear_(sparse_dim, dense_dim, size); return self; } namespace { bool _is_same_size_as_sparse(const SparseTensor& self, const SparseTensor& src) { - return self._sparseDims() == src._sparseDims() && self._denseDims() == src._denseDims() && self.sizes().equals(src.sizes()); + return self.sparse_dim() == src.sparse_dim() && self.dense_dim() == src.dense_dim() && self.sizes().equals(src.sizes()); } } SparseTensor& resize_as_sparse_(SparseTensor& self, const SparseTensor& src) { if (!_is_same_size_as_sparse(self, src)) { - sparse_resize_(self, src.sizes(), src._sparseDims(), src._denseDims()); + sparse_resize_(self, src.sizes(), src.sparse_dim(), src.dense_dim()); } return self; } @@ -257,17 +310,15 @@ SparseTensor& resize_as_sparse_(SparseTensor& self, const SparseTensor& src) { // NB: Dropped the resizeNd variants Tensor sparse_to_dense(const SparseTensor& self) { - Tensor dst = at::zeros(self.sizes(), self.type().toDense()); + Tensor dst = at::zeros(self.sizes(), self.options().layout(kStrided)); return dst.add_(self); } SparseTensor& copy_sparse_(SparseTensor& self, const SparseTensor& src, bool non_blocking) { - if (isSameTensor(self, src)) return self; - _get_sparse_impl(self)->resize_(src._sparseDims(), src._denseDims(), src.sizes()); - // NB: This seems to copy the underlying full indices/values buffer - _copy_into_sparse(self, _get_sparse_impl(src)->indices(), _get_sparse_impl(src)->values(), non_blocking); - _get_sparse_impl(self)->set_coalesced(src.is_coalesced()); - return self; + if (is_same_tensor(self, src)) return self; + get_sparse_impl(self)->resize_(src.sparse_dim(), src.dense_dim(), src.sizes()); + copy_into_sparse(self, src._indices(), src._values(), non_blocking); + return self._coalesced_(src.is_coalesced()); } SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { @@ -282,31 +333,24 @@ SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { // we should keep the original tensor intact and do coalesce on a copy of the tensor if (self._nnz() < 2) { SparseTensor dst = self.clone(); - _get_sparse_impl(dst)->set_coalesced(true); + dst._coalesced_(true); return dst; } LongTensor indices = self._indices(); Tensor values = self._values().contiguous(); - int64_t sparseDims = self._sparseDims(); - int64_t denseDims = self._denseDims(); + int64_t sparse_dim = self.sparse_dim(); + int64_t dense_dim = self.dense_dim(); int64_t nnz = self._nnz(); - LongTensor indices_scalar = at::zeros({nnz}, kLong); - - int64_t factor = 1; - for (int64_t d = sparseDims - 1; d >= 0; d--) { - LongTensor indices_slice = indices.select(0, d); - indices_scalar.add_(indices_slice, factor); // cadd is swapped args - factor *= self.size(d); - } + LongTensor indices_scalar = flatten_indices(indices, self.sizes()); SparseTensor dst = new_sparse(self.options()); - _get_sparse_impl(dst)->resize_(sparseDims, denseDims, self.sizes()); + get_sparse_impl(dst)->resize_(sparse_dim, dense_dim, self.sizes()); // TODO: is there a more idiomatic way to do this? LongTensor newIndices = at::empty(indices.sizes(), indices.options()); Tensor newValues = at::empty(values.sizes(), values.options()); - _alias_into_sparse(dst, newIndices, newValues); + alias_into_sparse(dst, newIndices, newValues); LongTensor indicesBuffer; LongTensor indicesPermutation; @@ -333,7 +377,7 @@ SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { } } else { ++i; - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { newIndicesAccessor[d][i] = indicesAccessor[d][pos]; } if (values.numel() > 0) { // if values is an empty tensor, there are no elements to copy @@ -344,8 +388,8 @@ SparseTensor coalesce_sparse_cpu(const SparseTensor& self) { } }); - _get_sparse_impl(dst)->set_coalesced(true); - _get_sparse_impl(dst)->set_nnz_and_narrow(i + 1); + dst._coalesced_(true); + get_sparse_impl(dst)->set_nnz_and_narrow(i + 1); return dst; } @@ -363,14 +407,15 @@ SparseTensor& sparse_mask_out_cpu(SparseTensor& r, const Tensor& t, const Sparse return r; } int64_t dim = t.dim(); - int64_t sparseDims = mask._sparseDims(); + int64_t sparse_dim = mask.sparse_dim(); LongTensor mask_indices = mask._indices(); Tensor mask_values = mask._values(); Tensor r_values = at::empty(mask_values.sizes(), r._values().options()); - _alias_into_sparse(r, mask_indices.clone(), r_values); - _get_sparse_impl(r)->set_coalesced(mask.is_coalesced()); + alias_into_sparse(r, mask_indices.clone(), r_values); + r._coalesced_(mask.is_coalesced()); int64_t r_nnz = mask._nnz(); - _get_sparse_impl(r)->set_nnz_and_narrow(r_nnz); + get_sparse_impl(r)->set_nnz_and_narrow(r_nnz); + if (t.numel() == 0) { // if t is an empty tensor, there is no need to mask its elements return r; } @@ -378,11 +423,11 @@ SparseTensor& sparse_mask_out_cpu(SparseTensor& r, const Tensor& t, const Sparse // NB: Relies on mask._nnz() == 0 test above auto mask_indices_accessor = mask_indices.accessor(); - if (dim > sparseDims) { + if (dim > sparse_dim) { // NB: This used to reuse buffers, but I deoptimized it for (int64_t i = 0; i < r_nnz; i++) { Tensor srcBuffer = t; - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { srcBuffer = srcBuffer.select(0, mask_indices_accessor[d][i]); } Tensor dstBuffer = r_values.select(0, i); @@ -398,7 +443,7 @@ SparseTensor& sparse_mask_out_cpu(SparseTensor& r, const Tensor& t, const Sparse scalar_t* t_ptr = t.data(); for (int64_t i = 0; i < r_nnz; i++) { int64_t idx = 0; - for (int64_t d = 0; d < sparseDims; d++) { + for (int64_t d = 0; d < sparse_dim; d++) { idx += mask_indices_accessor[d][i] * t.stride(d); } scalar_t val = t_ptr[idx]; diff --git a/aten/src/ATen/native/sparse/SparseTensorMath.cpp b/aten/src/ATen/native/sparse/SparseTensorMath.cpp index 053535a976c46..994a3647d74a1 100644 --- a/aten/src/ATen/native/sparse/SparseTensorMath.cpp +++ b/aten/src/ATen/native/sparse/SparseTensorMath.cpp @@ -3,12 +3,14 @@ #include #include #include -#include +#include #include namespace at { namespace native { +using namespace at::sparse; + // -------------------------------------------------------------------- // Utility functions // -------------------------------------------------------------------- @@ -43,9 +45,8 @@ namespace { // hummu hummu SparseTensor& zero_sparse_(SparseTensor& self) { AT_ASSERT(self.is_sparse()); - at::zeros_out(self, _get_sparse_impl(self)->sizes()); - _get_sparse_impl(self)->set_coalesced(true); // NB: This is new - return self; + at::zeros_out(self, get_sparse_impl(self)->sizes()); + return self._coalesced_(true); } // NB: Don't need zeros, zeros_like, already implemented in TensorFactories @@ -65,16 +66,17 @@ SparseTensor& mul_out_sparse_zerodim(SparseTensor& r, const SparseTensor& t, con AT_ASSERT(t.is_sparse()); AT_ASSERT(value.dim() == 0); - if (isSameTensor(r, t)) { + if (is_same_tensor(r, t)) { r._values().mul_(value); } else { r.resize_as_(t); - r._indices().resize_as_(t._indices()); - r._indices().copy_(t._indices()); + auto indices = r._indices(); + indices.resize_as_(t._indices()); + indices.copy_(t._indices()); Tensor r_values = r._values(); // Sigh... needed because mul_out takes Tensor& at::mul_out(r_values, t._values(), value); - _get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); - _get_sparse_impl(r)->set_coalesced(t.is_coalesced()); + get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); + r._coalesced_(t.is_coalesced()); } return r; } @@ -93,7 +95,7 @@ SparseTensor& log1p_out_sparse(SparseTensor& r, const SparseTensor& t) { AT_ASSERT(r.is_sparse()); AT_ASSERT(t.is_sparse()); - if (isSameTensor(r, t)) { + if (is_same_tensor(r, t)) { // don't have in-place log1p for uncoalesced input because coalesce() is not in-place AT_CHECK( r.is_coalesced(), "log1p: in-place on uncoalesced tensors is not supported yet!"); @@ -125,14 +127,13 @@ SparseTensor& pow_out_sparse_scalar(SparseTensor& r, const SparseTensor& t_, Sca SparseTensor t = t_.coalesce(); r.resize_as_(t); - r._indices().resize_as_(t._indices()); - r._indices().copy_(t._indices()); + auto indices = r._indices(); + indices.resize_as_(t._indices()); + indices.copy_(t._indices()); Tensor r_values = r._values(); // Sigh... needed because pow_out takes Tensor& at::pow_out(r_values, t._values(), value); - _get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); - _get_sparse_impl(r)->set_coalesced(t.is_coalesced()); - - return r; + get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); + return r._coalesced_(t.is_coalesced()); } SparseTensor pow_sparse_scalar(const SparseTensor& t, Scalar value) { @@ -150,16 +151,17 @@ SparseTensor& div_out_sparse_zerodim(SparseTensor& r, const SparseTensor& t, con AT_ASSERT(t.is_sparse()); AT_ASSERT(value.dim() == 0); - if (isSameTensor(r, t)) { + if (is_same_tensor(r, t)) { r._values().div_(value); } else { r.resize_as_(t); - r._indices().resize_as_(t._indices()); - r._indices().copy_(t._indices()); + auto indices = r._indices(); + indices.resize_as_(t._indices()); + indices.copy_(t._indices()); Tensor r_values = r._values(); // Sigh... needed because div_out takes Tensor& at::div_out(r_values, t._values(), value); - _get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); - _get_sparse_impl(r)->set_coalesced(t.is_coalesced()); + get_sparse_impl(r)->set_nnz_and_narrow(t._nnz()); + r._coalesced_(t.is_coalesced()); } return r; } @@ -199,20 +201,20 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S return mul_out_sparse_scalar(r, src, value); } - AT_CHECK(_is_same_density(t, src), "add: expected 'self' and 'other' to have same density, but 'self' has ", t._sparseDims(), " sparse dimensions while 'other' has ", src._sparseDims(), " sparse dimensions"); + AT_CHECK(is_same_density(t, src), "add: expected 'self' and 'other' to have same density, but 'self' has ", t.sparse_dim(), " sparse dimensions while 'other' has ", src.sparse_dim(), " sparse dimensions"); // saving those because they can be overwritten when doing in-place operations int64_t t_nnz = t._nnz(), s_nnz = src._nnz(), max_nnz = t_nnz + s_nnz; bool t_coalesced = t.is_coalesced(), s_coalesced = src.is_coalesced(); - int64_t sparseDims = src._sparseDims(); + int64_t sparse_dim = src.sparse_dim(); LongTensor t_indices = t._indices(); Tensor t_values = t._values(); LongTensor src_indices = src._indices(); Tensor s_values = src._values(); - LongTensor r_indices = at::empty({sparseDims, max_nnz}, t_indices.options()); - Tensor r_values = _new_values_with_size_of(s_values, max_nnz).zero_(); + LongTensor r_indices = at::empty({sparse_dim, max_nnz}, t_indices.options()); + Tensor r_values = new_values_with_size_of(s_values, max_nnz).zero_(); r.resize_as_(src); - _get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); + get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); int64_t blockSize = r_values.stride(0); int64_t cmp, d; @@ -236,7 +238,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S cmp = 1; } else { cmp = 0; - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { if (t_indices_accessor[d][t_i] < src_indices_accessor[d][s_i]) { cmp = 1; break; @@ -248,7 +250,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S } } if (cmp >= 0) { - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { r_indices_accessor[d][r_i] = t_indices_accessor[d][t_i]; } if (t_values.numel() > 0) { // We add all elements from t_values to r_values only if t_values is not an empty tensor @@ -259,7 +261,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S t_i++; } if (cmp <= 0) { - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { r_indices_accessor[d][r_i] = src_indices_accessor[d][s_i]; } if (s_values.numel() > 0) { // We add all elements from s_values to r_values only if s_values is not an empty tensor @@ -274,14 +276,12 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S } ); - _get_sparse_impl(r)->set_nnz_and_narrow(r_i); + get_sparse_impl(r)->set_nnz_and_narrow(r_i); // TODO: I think it may be possible to track inside the loop and // detect when we are uncoalesced (e.g., by observing that an // index goes backwards) which may be more precise than using the // coalesced flag here. But this is easy. - _get_sparse_impl(r)->set_coalesced(t_coalesced && s_coalesced); - - return r; + return r._coalesced_(t_coalesced && s_coalesced); } // -------------------------------------------------------------------- @@ -302,7 +302,7 @@ void add_dense_sparse_worker_cpu(Tensor& r, Scalar value, const SparseTensor& sp #pragma omp parallel for private(k) for (k = 0; k < sparse._nnz(); k++) { int64_t index = r.storage_offset(); - for (int64_t d = 0; d < sparse._sparseDims(); d++) { + for (int64_t d = 0; d < sparse.sparse_dim(); d++) { index += r.stride(d) * indices_accessor[d][k]; } r_ptr[index] += cast_value * values_accessor[k]; @@ -329,9 +329,9 @@ Tensor& add_out_dense_sparse_cpu(Tensor& r, const Tensor& dense, SparseTensorRef LongTensor indices = sparse._indices(); Tensor values = sparse._values(); int64_t nDim = dense.dim(); - int64_t nDimI = sparse._sparseDims(); + int64_t nDimI = sparse.sparse_dim(); - if (!isSameTensor(r, dense)) r.copy_(dense); + if (!is_same_tensor(r, dense)) r.copy_(dense); if (sparse._nnz() == 0) return r; // accessors rely on nnz test @@ -339,7 +339,7 @@ Tensor& add_out_dense_sparse_cpu(Tensor& r, const Tensor& dense, SparseTensorRef auto indices_accessor = indices.accessor(); for (int64_t k = 0; k < sparse._nnz(); k++) { Tensor dstBuffer = r; - for (int64_t d = 0; d < sparse._sparseDims(); d++) { + for (int64_t d = 0; d < sparse.sparse_dim(); d++) { dstBuffer = dstBuffer.select(0, indices_accessor[d][k]); } Tensor srcBuffer = values.select(0, k); @@ -383,15 +383,15 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor // saving those because they can be overwritten when doing in-place operations int64_t t_nnz = t._nnz(), s_nnz = src._nnz(); int64_t max_nnz = std::min(t_nnz, s_nnz); // multiply by zero is zero, and can be dropped - int64_t sparseDims = src._sparseDims(); + int64_t sparse_dim = src.sparse_dim(); LongTensor t_indices = t._indices(); Tensor t_values = t._values(); LongTensor src_indices = src._indices(); Tensor s_values = src._values(); - LongTensor r_indices = at::empty({sparseDims, max_nnz}, t_indices.options()); - Tensor r_values = _new_values_with_size_of(t_values, max_nnz).zero_(); + LongTensor r_indices = at::empty({sparse_dim, max_nnz}, t_indices.options()); + Tensor r_values = new_values_with_size_of(t_values, max_nnz).zero_(); r.resize_as_(src); - _get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); + get_sparse_impl(r)->set_indices_and_values_unsafe(r_indices, r_values); int64_t match, d; int64_t r_i = 0, t_i = 0, s_i = 0; @@ -406,7 +406,7 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor // indices were found. auto index_preamble = [&]() { match = 1; - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { if (t_indices_accessor[d][t_i] < src_indices_accessor[d][s_i]) { t_i++; match = 0; @@ -419,7 +419,7 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor } } if (!match) return false; - for (d = 0; d < sparseDims; d++) { + for (d = 0; d < sparse_dim; d++) { r_indices_accessor[d][r_i] = t_indices_accessor[d][t_i]; } return true; @@ -451,10 +451,8 @@ SparseTensor& mul_out_sparse_cpu(SparseTensor& r, const Tensor& t_, const Tensor ); } - _get_sparse_impl(r)->set_nnz_and_narrow(r_i); - _get_sparse_impl(r)->set_coalesced(true); - - return r; + get_sparse_impl(r)->set_nnz_and_narrow(r_i); + return r._coalesced_(true); } // -------------------------------------------------------------------- @@ -472,7 +470,7 @@ void s_addmm_out_sparse_dense_worker(int64_t nnz, int64_t dim_i, int64_t dim_j, if (cast_beta == 0) { r.zero_(); } else if (cast_beta == 1) { - if (!isSameTensor(r, t)) { + if (!is_same_tensor(r, t)) { r.copy_(t); } } else { @@ -523,8 +521,8 @@ Tensor& s_addmm_out_sparse_dense_cpu( AT_CHECK(!sparse_.is_cuda(), "addmm: expected 'mat1' to be a CPU tensor, but got a CUDA tensor"); AT_CHECK(!dense.is_cuda(), "addmm: expected 'mat2' to be a CPU tensor, but got a CUDA tensor"); - AT_CHECK(sparse_._sparseDims() == 2, "addmm: matrices expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, "addmm: scalar values expected, got ", sparse_._denseDims(), "D values"); + AT_CHECK(sparse_.sparse_dim() == 2, "addmm: matrices expected, got ", sparse_.sparse_dim(), "D tensor"); + AT_CHECK(sparse_.dense_dim() == 0, "addmm: scalar values expected, got ", sparse_.dense_dim(), "D values"); AT_CHECK(dense.dim() == 2, "addmm: matrices expected, got ", dense.dim(), "D tensor"); SparseTensor sparse = sparse_.coalesce(); @@ -599,10 +597,10 @@ SparseTensor& hspmm_out_sparse_cpu(SparseTensor& r, const SparseTensor& sparse_, AT_CHECK(!r.is_cuda(), "hspmm: expected 'out' to be CPU tensor, but got CUDA tensor"); AT_CHECK(!dense.is_cuda(), "hspmm: expected 'other' to be a CPU tensor, but got a CUDA tensor"); - AT_CHECK(sparse_._sparseDims() == 2, - "hspmm: Argument #2: matrices expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, - "hspmm: Argument #2: scalar values expected, got ", sparse_._denseDims(), "D values"); + AT_CHECK(sparse_.sparse_dim() == 2, + "hspmm: Argument #2: matrices expected, got ", sparse_.sparse_dim(), "D tensor"); + AT_CHECK(sparse_.dense_dim() == 0, + "hspmm: Argument #2: scalar values expected, got ", sparse_.dense_dim(), "D values"); AT_CHECK(dense.dim() == 2, "hspmm: Argument #3: matrices expected, got ", dense.dim(), "D tensor"); @@ -613,7 +611,7 @@ SparseTensor& hspmm_out_sparse_cpu(SparseTensor& r, const SparseTensor& sparse_, AT_CHECK(dense.size(0) == k, "hspmm: Argument #3: Expected dim 0 size ", k, ", got ", dense.size(0)); - _get_sparse_impl(r)->raw_resize_(1, 1, {m, n}); + get_sparse_impl(r)->raw_resize_(1, 1, {m, n}); SparseTensor sparse = sparse_.coalesce(); @@ -649,13 +647,13 @@ SparseTensor& hspmm_out_sparse_cpu(SparseTensor& r, const SparseTensor& sparse_, indices.resize_({1, outNnz}); Tensor values = at::empty({outNnz, n}, dense.options()); - std::vector new_size = _get_sparse_impl(newSparse)->sizes().vec(); + std::vector new_size = get_sparse_impl(newSparse)->sizes().vec(); new_size[0] = outNnz; - _get_sparse_impl(newSparse)->raw_resize_(_get_sparse_impl(newSparse)->sparseDims(), _get_sparse_impl(newSparse)->denseDims(), new_size); + get_sparse_impl(newSparse)->raw_resize_(get_sparse_impl(newSparse)->sparse_dim(), get_sparse_impl(newSparse)->dense_dim(), new_size); // Compute output values tensor with sparse * dense multiplication s_addmm_out_sparse_dense_cpu(values, values, newSparse, dense, 0, alpha); - _get_sparse_impl(r)->set_indices_and_values_unsafe(indices, values); + get_sparse_impl(r)->set_indices_and_values_unsafe(indices, values); return r; } @@ -683,10 +681,10 @@ SparseTensor& _sspaddmm_out_cpu( AT_CHECK(!sparse_.is_cuda(), "sspaddmm: expected 'mat1' to be a CPU tensor, but got a CUDA tensor"); AT_CHECK(!dense.is_cuda(), "sspaddmm: expected 'mat2' to be a CPU tensor, but got a CUDA tensor"); - AT_CHECK(sparse_._sparseDims() == 2, - "sspaddmm: Argument #2: matrices expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, - "sspaddmm: Argument #2: scalar values expected, got ", sparse_._denseDims(), "D values"); + AT_CHECK(sparse_.sparse_dim() == 2, + "sspaddmm: Argument #2: matrices expected, got ", sparse_.sparse_dim(), "D tensor"); + AT_CHECK(sparse_.dense_dim() == 0, + "sspaddmm: Argument #2: scalar values expected, got ", sparse_.dense_dim(), "D values"); AT_CHECK(dense.dim() == 2, "sspaddmm: Argument #2: matrices expected, got ", dense.dim(), "D tensor"); @@ -699,7 +697,7 @@ SparseTensor& _sspaddmm_out_cpu( // NB: This has to occur before the checks, because r may alias t. // See test_saddmm - _get_sparse_impl(r)->raw_resize_(2, 0, {dim_i, dim_k}); + get_sparse_impl(r)->raw_resize_(2, 0, {dim_i, dim_k}); AT_CHECK(dense.size(0) == dim_j, "sspaddmm: Argument #3: Expected dim 0 size ", dim_j, ", got ", dense.size(0)); @@ -774,8 +772,8 @@ SparseTensor& _sspaddmm_out_cpu( ); // to avoid a clone - _get_sparse_impl(r)->set_indices_and_values_unsafe(newi, newv); - _get_sparse_impl(r)->set_nnz_and_narrow(p); + get_sparse_impl(r)->set_indices_and_values_unsafe(newi, newv); + get_sparse_impl(r)->set_nnz_and_narrow(p); return r; } diff --git a/aten/src/ATen/native/sparse/SparseUtils.h b/aten/src/ATen/native/sparse/SparseUtils.h deleted file mode 100644 index 2e48fe41238c2..0000000000000 --- a/aten/src/ATen/native/sparse/SparseUtils.h +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include - -#include - -namespace at { namespace native { - -// Just for documentary purposes -using SparseTensor = Tensor; -using LongTensor = Tensor; -using IntTensor = Tensor; -using SparseType = Type; - -namespace { - -// This is an internal utility function for getting at the SparseTensorImpl, -// so that we can write sparse tensor specific accessors for special fields -// in SparseTensor. You should only use this for writing low level -// setters/getters for SparseTensorImpl fields; otherwise, you should use -// the low level setters/getters that were implemented using this. -// -// This may be called repeatedly, so make sure it's pretty cheap. -SparseTensorImpl* _get_sparse_impl(const SparseTensor& self) { - if (!self.is_sparse()) AT_ERROR("_internal_get_SparseTensorImpl: not a sparse tensor"); - return static_cast(self.unsafeGetTensorImpl()); -} - -// Port of the old THCSTensor_(checkGPU), but it doesn't really belong here -// because it is more general -// NB: I dropped kernelP2PEnabled support -// NB: This only works if the tensors are KNOWN to be CUDA. -// TODO: Generalize it so it works on CPU as well -inline bool _check_device(ArrayRef ts) { - if (ts.empty()) { - return true; - } - const Tensor& ref_t = ts.front(); - int64_t curDevice = current_device(); - for (const Tensor& t : ts) { - if (t.get_device() != curDevice) return false; - } - return true; -} - -// Takes indices and values and directly puts them into the sparse tensor, no -// copy. This used to be called THSTensor_(_move) -inline void _alias_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) { - _get_sparse_impl(self)->set_indices_and_values_unsafe(indices, values); -} - -// Take indices and values and makes a (data) copy of them to put into the sparse -// indices/values. This used to be called THSTensor_(_set) -inline void _copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) { - _alias_into_sparse(self, self._indices().type().copy(indices, non_blocking), self._values().type().copy(values, non_blocking)); -} - -// Does NOT make copies of indices/values -inline SparseTensor _new_with_dims_and_tensor_sparse( - const SparseType& dtype, - int64_t sparseDims, - int64_t denseDims, - ArrayRef sizes, - const LongTensor& indices, - const Tensor& values) { - SparseTensor self = at::empty({0}, dtype.options()); - _get_sparse_impl(self)->resize_(sparseDims, denseDims, sizes); - _alias_into_sparse(self, indices, values); - return self; -} - -// TODO: put this into the public API -inline bool isSameTensor(const Tensor& lhs, const Tensor& rhs) { - return lhs.unsafeGetTensorImpl() == rhs.unsafeGetTensorImpl(); -} - -inline bool _is_same_density(const SparseTensor& self, const SparseTensor& src) { - return self._sparseDims() == src._sparseDims() && self._denseDims() == src._denseDims(); -} - -// if forceClone is true, the result will forced to be a clone of self. -inline LongTensor _newFlattenedIndices(const SparseTensor& self, bool forceClone) { - LongTensor indices = self._indices(); - int64_t sparseDims = self._sparseDims(); - if (sparseDims == 1) { - if (forceClone) { - return indices.clone(); - } else { - return indices; - } - } else { - // FIXME TH_INDEX_BASE - int64_t factor = 1; - LongTensor indices1D = at::empty({1, self._nnz()}, indices.options()); - indices1D.fill_(TH_INDEX_BASE); - for (int64_t d = sparseDims - 1; d >= 0; d--) { - indices1D.add_(indices.select(0, d), factor); - if (TH_INDEX_BASE != 0) { - indices1D.add_(-TH_INDEX_BASE); - } - factor *= self.size(d); - } - return indices1D; - } -} - -// Give us a new values tensor, with the same dimensionality -// as 'values' but with a new number of non-zero elements. -// TODO: Expose this for real in ATen, some day? -// NB: Doesn't preserve data. -inline Tensor _new_values_with_size_of(const Tensor& values, int64_t nnz) { - std::vector size = values.sizes().vec(); - size[0] = nnz; - return at::empty(size, values.options()); -} - - - -} // anonymous namespace - -}} // namespace at::native diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDAApplyUtils.cuh b/aten/src/ATen/native/sparse/cuda/SparseCUDAApplyUtils.cuh index 44bd3ab8f7060..3de5ced0edfaf 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDAApplyUtils.cuh +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDAApplyUtils.cuh @@ -38,6 +38,8 @@ __device__ void applyOp3( } } +// Assume both dense and values are contiguous. +// Currently only used in add_out_dense_sparse_cuda: add(dense, sparse, scalar). template __global__ void sparseElementwiseKernel( Op op, @@ -45,23 +47,26 @@ __global__ void sparseElementwiseKernel( TensorInfo indices, TensorInfo values, const IndexType nnz) { - IndexType indskip = indices.strides[0]; - IndexType valueSize = values.strides[0]; + IndexType ind_skip = indices.strides[0]; + IndexType ind_nnz_skip = indices.strides[1]; + IndexType value_size = values.strides[0]; // numel of each slice in values for (IndexType linearId = blockIdx.x; linearId < nnz; linearId += gridDim.x) { IndexType index = 0; for (IndexType d = 0; d < indices.sizes[0]; d++) { - index = dense.sizes[d] * index + indices.data[d * indskip + linearId]; + index = dense.sizes[d] * index + indices.data[d * ind_skip + linearId * ind_nnz_skip]; } - Real *dst = dense.data + index * valueSize; - Real *src = values.data + linearId * valueSize; - for (IndexType linearId2 = threadIdx.x; linearId2 < valueSize; linearId2 += blockDim.x) { + Real *dst = dense.data + index * value_size; + Real *src = values.data + linearId * value_size; + for (IndexType linearId2 = threadIdx.x; linearId2 < value_size; linearId2 += blockDim.x) { op(dst + linearId2, src + linearId2); } } } +// Assume dense is contiguous. +// Currently only used in add_out_dense_sparse_cuda: add(dense, sparse, scalar). template __global__ void sparseElementwiseKernelScalar( Op op, @@ -69,15 +74,17 @@ __global__ void sparseElementwiseKernelScalar( TensorInfo indices, TensorInfo values, const IndexType nnz) { - IndexType indskip = indices.strides[0]; + IndexType ind_skip = indices.strides[0]; + IndexType ind_nnz_skip = indices.strides[1]; + IndexType value_skip = values.strides[0]; for (IndexType linearId = blockIdx.x * blockDim.x + threadIdx.x; linearId < nnz; linearId += gridDim.x * blockDim.x) { IndexType index = 0; for (IndexType d = 0; d < indices.sizes[0]; d++) { - index = dense.sizes[d] * index + indices.data[d * indskip + linearId]; + index = dense.sizes[d] * index + indices.data[d * ind_skip + linearId * ind_nnz_skip]; } - op(dense.data + index, values.data + linearId); + op(dense.data + index, values.data + linearId * value_skip); } } diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp index ab9fb15c62873..1da0345e3c36f 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp @@ -1,18 +1,20 @@ #include #include -#include +#include namespace at { namespace native { +using namespace at::sparse; + SparseTensor& sparse_mask_out_cuda(SparseTensor& r, const Tensor& t, const SparseTensor& mask) { AT_CHECK(mask.is_coalesced(), "sparse_mask: mask is uncoalesced"); AT_CHECK(mask.sizes().equals(t.sizes()), "sparse_mask: operands have incompatible sizes; self has size ", t.sizes(), " but mask has size ", mask.sizes()); - AT_ASSERT(t.is_cuda()); // dispatch argument + AT_ASSERT(t.is_cuda()); // dispatch argument AT_CHECK(mask.is_cuda(), "sparse_mask: expected 'mask' to be CUDA, but got CPU"); AT_CHECK(r.is_cuda(), "sparse_mask: expected 'out' to be CUDA, but got CPU"); - AT_CHECK(_check_device({r, t, mask}), + AT_CHECK(check_device({r, t, mask}), "sparse_mask: arguments are located on different devices; self is on device ", t.get_device(), ", mask is on device ", mask.get_device(), ", out is on device ", r.get_device()); resize_as_sparse_(r, mask); @@ -22,25 +24,24 @@ SparseTensor& sparse_mask_out_cuda(SparseTensor& r, const Tensor& t, const Spars LongTensor mask_indices = mask._indices(); Tensor mask_values = mask._values(); Tensor r_values = at::empty(mask_values.sizes(), r._values().options()); - _alias_into_sparse(r, mask_indices.clone(), r_values); - _get_sparse_impl(r)->set_coalesced(mask.is_coalesced()); - _get_sparse_impl(r)->set_nnz_and_narrow(mask._nnz()); + alias_into_sparse(r, mask_indices.clone(), r_values); + r._coalesced_(mask.is_coalesced()); if (t.numel() == 0) { // if t is an empty tensor, there is no need to mask its elements return r; } LongTensor indices = at::zeros({mask._nnz()}, mask_indices.options()); - for (int64_t d = 0; d < mask._sparseDims(); d++) { + for (int64_t d = 0; d < mask.sparse_dim(); d++) { indices.mul_(mask.size(d)); // This used to use a buffer but I deoptimized it indices.add_(mask_indices.select(0, d)); } - std::vector view_size(1 + mask._denseDims()); + std::vector view_size(1 + mask.dense_dim()); view_size[0] = -1; - for (int64_t d = 0; d < mask._denseDims(); d++) { - view_size[d + 1] = mask.size(mask._sparseDims() + d); + for (int64_t d = 0; d < mask.dense_dim(); d++) { + view_size[d + 1] = mask.size(mask.sparse_dim() + d); } Tensor t_view = t.view(view_size); diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu index 23cc9a944a35c..36511bbf58d61 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -24,6 +24,8 @@ namespace at { namespace native { +using namespace at::sparse; + SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { int64_t nnz = self._nnz(); if (self.is_coalesced()) { @@ -33,7 +35,7 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { // we should keep the original tensor intact and do coalesce on a copy of the tensor if (nnz < 2) { SparseTensor dst = self.clone(); - _get_sparse_impl(dst)->set_coalesced(true); + dst._coalesced_(true); return dst; } @@ -48,12 +50,12 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { // TODO: I'm not sure if this could ever be non-contiguous LongTensor values = self._values().contiguous(); - int64_t sparseDims = self._sparseDims(); + int64_t sparse_dim = self.sparse_dim(); int64_t stride = values.stride(0); // indices will be modified by Thrust, so we have to clone or use new storage // here. - LongTensor indices1D = _newFlattenedIndices(self, true); + LongTensor indices1D = flatten_indices(self._indices(), self.sizes(), true); LongTensor origIndices = at::empty({nnz}, self._indices().options()); LongTensor uniqueOffsets = at::empty({nnz}, self._indices().options()); @@ -122,14 +124,14 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { //////////////////////////////////////////////////////////// // unflatten indices if necessary LongTensor newIndices; - if (sparseDims == 1) { + if (sparse_dim == 1) { newIndices = indices1D; } else { - newIndices = at::empty({sparseDims, newNnz}, origIndices.options()); + newIndices = at::empty({sparse_dim, newNnz}, origIndices.options()); if (TH_INDEX_BASE != 0) { indices1D.add_(-1); } - for (int64_t d = sparseDims - 1; d >= 0; d--) { + for (int64_t d = sparse_dim - 1; d >= 0; d--) { // NB: Not a select, so I can preserve the outer dimension LongTensor indicesSlice = newIndices.narrow(0, d, 1); // Note for the porting guide: THCTensor_(copy) does NOT do normal @@ -145,8 +147,7 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { } //////////////////////////////////////////////////////////// - SparseTensor dst = ::at::native::sparse_coo_tensor(newIndices, newValues, self.sizes()); - _get_sparse_impl(dst)->set_coalesced(true); + SparseTensor dst = ::at::native::sparse_coo_tensor(newIndices, newValues, self.sizes())._coalesced_(true); THCudaCheck(cudaGetLastError()); return dst; diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu index d655d3e93289e..eef84007ec3c0 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -18,6 +18,8 @@ namespace at { namespace native { +using namespace at::sparse; + // -------------------------------------------------------------------- // Utility functions // -------------------------------------------------------------------- @@ -45,12 +47,11 @@ Tensor& s_addmm_out_sparse_dense_cuda(Tensor& r_, const Tensor& t, const SparseT AT_CHECK(sparse_.is_cuda(), "addmm: expected 'mat1' to be CUDA, but got CPU"); AT_CHECK(dense.is_cuda(), "addmm: expected 'mat2' to be CUDA, but got CPU"); - AT_CHECK(_check_device({sparse_, r_, t, dense})); + AT_CHECK(check_device({sparse_, r_, t, dense})); - // TODO: This error message seems awfully opaque - AT_CHECK(sparse_._sparseDims() == 2, "addmm: 2D tensor expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, "addmm: scalar values expected, got ", sparse_._denseDims(), "D values"); AT_CHECK(dense.dim() == 2, "addmm: 2D tensor expected, got ", dense.dim(), "D tensor"); + AT_CHECK(sparse_.sparse_dim() == 2, "addmm: expected first two dims to be sparse (indices has size 2 at first dim), but got ", sparse_.sparse_dim(), " spase dims"); + // no need to check dense_dim because dense_dim + sparse_dim = dim // mxk * kxn = mxn int64_t m = sparse_.size(0); @@ -87,7 +88,7 @@ Tensor& s_addmm_out_sparse_dense_cuda(Tensor& r_, const Tensor& t, const SparseT if (cast_beta == 0) { r_.zero_(); } else if (cast_beta == 1) { - if (!isSameTensor(t, r_)) { + if (!is_same_tensor(t, r_)) { r_.copy_(t); } } else { @@ -174,12 +175,12 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse AT_CHECK(r_.is_cuda(), "hspmm: expected 'out' to be CUDA, but got CPU"); AT_CHECK(dense.is_cuda(), "hspmm: expected 'mat2' to be CUDA, but got CPU"); - AT_CHECK(_check_device({r_, sparse_, dense})); + AT_CHECK(check_device({r_, sparse_, dense})); - AT_CHECK(sparse_._sparseDims() == 2, - "hspmm: Argument #2: 2D tensor expected, got ", sparse_._sparseDims(), "D tensor"); - AT_CHECK(sparse_._denseDims() == 0, - "hspmm: Argument #2: scalar values expected, got ", sparse_._denseDims(), "D values"); + AT_CHECK(sparse_.sparse_dim() == 2, + "hspmm: Argument #2: 2D tensor expected, got ", sparse_.sparse_dim(), "D tensor"); + AT_CHECK(sparse_.dense_dim() == 0, + "hspmm: Argument #2: scalar values expected, got ", sparse_.dense_dim(), "D values"); AT_CHECK(dense.dim() == 2, "hspmm: Argument #3: 2D tensor expected, got ", dense.dim(), "D tensor"); @@ -190,7 +191,7 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse AT_CHECK(dense.size(0) == k, "hspmm: Argument #3: Expected dim 0 size ", k, ", got ", dense.size(0)); - _get_sparse_impl(r_)->resize_and_clear_(1, 1, {m, n}); + get_sparse_impl(r_)->resize_and_clear_(1, 1, {m, n}); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); auto allocator = THCThrustAllocator(globalContext().lazyInitCUDA()); @@ -217,12 +218,12 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse thrust::device_ptr indicesIter(dstIndices.data()); thrust::sequence(policy, indicesIter, indicesIter + nnz); - std::vector new_size = _get_sparse_impl(newSparse)->sizes().vec(); + std::vector new_size = get_sparse_impl(newSparse)->sizes().vec(); new_size[0] = nnz; - _get_sparse_impl(newSparse)->raw_resize_(_get_sparse_impl(newSparse)->sparseDims(), _get_sparse_impl(newSparse)->denseDims(), new_size); + get_sparse_impl(newSparse)->raw_resize_(get_sparse_impl(newSparse)->sparse_dim(), get_sparse_impl(newSparse)->dense_dim(), new_size); s_addmm_out_sparse_dense_cuda(values, values, newSparse, dense, 0, /*alpha*/ 1); - _get_sparse_impl(r_)->set_indices_and_values_unsafe(indices, values); + get_sparse_impl(r_)->set_indices_and_values_unsafe(indices, values); return r_; } @@ -245,7 +246,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR AT_CHECK(sparse.is_cuda(), "add: expected 'other' to be CUDA, but got CPU"); AT_CHECK(r_.is_cuda(), "add: expected 'out' to be CUDA, but got CPU"); - AT_CHECK(_check_device({sparse, r_, dense})); + AT_CHECK(check_device({sparse, r_, dense})); AT_CHECK(dense.sizes().equals(sparse.sizes()), "add: expected 'self' and 'other' to have same size, but self has size ", dense.sizes(), " while other has size ", sparse.sizes(), " (FYI: dense-sparse addition does not currently support broadcasting)"); @@ -258,7 +259,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR } Tensor r = r_; - if (!isSameTensor(r, dense)) { + if (!is_same_tensor(r, dense)) { r_.resize_as_(dense); r_.copy_(dense); } else { @@ -269,7 +270,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR LongTensor indices = sparse._indices(); Tensor values = sparse._values(); int64_t nDim = dense.dim(); - int64_t nDimI = sparse._sparseDims(); + int64_t nDimI = sparse.sparse_dim(); if (sparse._values().numel() == 0) { return r_; @@ -282,7 +283,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR int curDevice = -1; cudaGetDevice(&curDevice); cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice); - if (sparse._denseDims() == 0) { + if (sparse.dense_dim() == 0) { AT_CHECK(cuda::getApplyGrid(nnz, grid, curDevice), "add: Argument #0: tensor too large or too many dimensions"); AT_DISPATCH_ALL_TYPES_AND_HALF( @@ -296,6 +297,9 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR } else { AT_CHECK(cuda::getApplyGrid(nnz * block.x, grid, curDevice), "add: Argument #0: tensor too large or too many dimensions"); + // sparseElementwiseKernel needs values to be contiguous too + values = values.contiguous(); + AT_DISPATCH_ALL_TYPES_AND_HALF( values.type(), "add_out_dense_sparse_cuda", [&] { apply::sparseElementwiseKernel, uint64_t, scalar_t> @@ -306,7 +310,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR }); } } else { - LongTensor indices1D = _newFlattenedIndices(sparse, 0).squeeze_(0).narrow(0, 0, nnz); + LongTensor indices1D = flatten_indices(indices, sparse.sizes(), 0); // FIXME: at some point we can wrap the scale into indexAdd // NB: Purposely not inplace! @@ -327,7 +331,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, SparseTensorR } Tensor r_view = r.view({view_rows, view_columns}); - values = values.narrow(0, 0, nnz).reshape({nnz, view_columns}); + values = values.reshape({nnz, view_columns}); r_view.index_add_(0, indices1D, values); } THCudaCheck(cudaGetLastError()); @@ -344,7 +348,7 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const AT_CHECK(src.is_cuda(), "add: expected 'other' to be CUDA, but got CPU"); AT_CHECK(r_.is_cuda(), "add: expected 'out' to be CUDA, but got CPU"); - AT_CHECK(_check_device({r_, t, src})); + AT_CHECK(check_device({r_, t, src})); AT_CHECK(t.sizes().equals(src.sizes()), "add: expected 'self' and 'other' to have same size, but ", t.sizes(), " != ", src.sizes()); if (src._nnz() == 0) { @@ -354,7 +358,7 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const return mul_out_sparse_scalar(r_, src, value); } - AT_CHECK(_is_same_density(t, src), "add: expected 'self' and 'other' to have same density, but 'self' has ", t._sparseDims(), " sparse dimensions while 'other' has ", src._sparseDims(), " sparse dimensions"); + AT_CHECK(is_same_density(t, src), "add: expected 'self' and 'other' to have same density, but 'self' has ", t.sparse_dim(), " sparse dimensions while 'other' has ", src.sparse_dim(), " sparse dimensions"); // We deliberately choose to simply concat the indices and values tensors // rather than merging them. This removes the need to synchronously fetch nnz @@ -375,7 +379,7 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const LongTensor r_indices_ = at::cat({t_indices_, s_indices_}, 1); Tensor r_values_ = at::cat({t_values_, s_values_}, 0); r_.resize_as_(src); - _alias_into_sparse(r_, r_indices_, r_values_); + alias_into_sparse(r_, r_indices_, r_values_); // FIXME: add some heuristic about when to call coalesce() here, so that // tensors don't totally blow up in size by concatenation; e.g. @@ -402,7 +406,7 @@ SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, cons AT_ASSERT(t_.is_cuda()); // dispatch argument AT_CHECK(src_.is_cuda(), "mul: expected 'other' to be CUDA, but got CPU"); AT_CHECK(r_.is_cuda(), "mul: expected 'out' to be CUDA, but got CPU"); - AT_CHECK(_check_device({r_, t_, src_})); + AT_CHECK(check_device({r_, t_, src_})); AT_CHECK(t_.sizes().equals(src_.sizes()), "mul: expected 'self' and 'other' to have same size, but ", t_.sizes(), " != ", src_.sizes()); SparseTensor t = t_.coalesce(); @@ -416,15 +420,15 @@ SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, cons // saving those because they can be overwritten when doing in-place operations int64_t t_nnz = t._nnz(), s_nnz = src._nnz(); int64_t max_nnz = std::min(t_nnz, s_nnz); // multiply by zero is zero, and can be dropped - int64_t sparseDims = src._sparseDims(); + int64_t sparse_dim = src.sparse_dim(); LongTensor t_indices_ = t._indices(); Tensor t_values_ = t._values(); LongTensor s_indices_ = src._indices(); Tensor s_values_ = src._values(); - LongTensor r_indices_ = at::empty({sparseDims, max_nnz}, t_indices_.options()); - Tensor r_values_ = _new_values_with_size_of(t_values_, max_nnz).zero_(); + LongTensor r_indices_ = at::empty({sparse_dim, max_nnz}, t_indices_.options()); + Tensor r_values_ = new_values_with_size_of(t_values_, max_nnz).zero_(); r_.resize_as_(src); - _get_sparse_impl(r_)->set_indices_and_values_unsafe(r_indices_, r_values_); + get_sparse_impl(r_)->set_indices_and_values_unsafe(r_indices_, r_values_); int64_t valueSize = t_values_.stride(0); const dim3 block = dim3(std::min(static_cast(cuda::getApplyBlock().x), valueSize)); @@ -457,10 +461,9 @@ SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, cons // sync! (surely there is a more idiomatic way to do this...) LongTensor cpu_resultNnz = at::empty({1}, CPU(kLong)); cpu_resultNnz.copy_(resultNnz); - _get_sparse_impl(r_)->set_nnz_and_narrow(cpu_resultNnz.accessor()[0]); - _get_sparse_impl(r_)->set_coalesced(true); + get_sparse_impl(r_)->set_nnz_and_narrow(cpu_resultNnz.accessor()[0]); - return r_; + return r_._coalesced_(true); } }} // namespace at::native diff --git a/aten/src/ATen/native_parse.py b/aten/src/ATen/native_parse.py index 7f34b48e0228f..3368a9f10178f 100644 --- a/aten/src/ATen/native_parse.py +++ b/aten/src/ATen/native_parse.py @@ -129,6 +129,7 @@ def run(paths): output_arguments = [x for x in arguments if x.get('output')] declaration['return'] = return_type if len(output_arguments) == 0 else output_arguments declaration['variants'] = func.get('variants', ['function']) + declaration['requires_tensor'] = func.get('requires_tensor', False) declaration['cpu_half'] = func.get('cpu_half', False) declaration['deprecated'] = func.get('deprecated', False) declaration['device_guard'] = func.get('device_guard', True) diff --git a/aten/src/ATen/templates/TypeDefault.cpp b/aten/src/ATen/templates/TypeDefault.cpp index 76ad9465d6e32..c0fb9ca80bdac 100644 --- a/aten/src/ATen/templates/TypeDefault.cpp +++ b/aten/src/ATen/templates/TypeDefault.cpp @@ -13,6 +13,7 @@ #include "ATen/Tensor.h" #include "ATen/core/TensorOptions.h" #include "ATen/DeviceGuard.h" +#include "ATen/SparseTensorUtils.h" namespace at { diff --git a/test/common_utils.py b/test/common_utils.py index 414c6dd8aae17..e6d497d47c236 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -290,6 +290,31 @@ def assertTensorsSlowEqual(self, x, y, prec=None, message=''): max_err = max(max_err, abs(x[index] - y[index])) self.assertLessEqual(max_err, prec, message) + def genSparseTensor(self, size, sparse_dim, nnz, is_uncoalesced, device='cpu'): + assert all(size[d] > 0 for d in range(sparse_dim)) or nnz == 0, 'invalid arguments' + + v_size = [nnz] + list(size[sparse_dim:]) + v = torch.randn(*v_size, device=device) + i = torch.rand(sparse_dim, nnz, device=device) + i.mul_(torch.tensor(size[:sparse_dim]).unsqueeze(1).to(i)) + i = i.to(torch.long) + if is_uncoalesced: + v = torch.cat([v, torch.randn_like(v)], 0) + i = torch.cat([i, i], 1) + + x = torch.sparse_coo_tensor(i, v, torch.Size(size)) + + if not is_uncoalesced: + x = x.coalesce() + else: + # FIXME: `x` is a sparse view of `v`. Currently rebase_history for + # sparse views is not implemented, so this workaround is + # needed for inplace operations done on `x`, e.g., copy_(). + # Remove after implementing something equivalent to CopySlice + # for sparse views. + x = x.detach() + return x, x._indices().clone(), x._values().clone() + def safeToDense(self, t): r = self.safeCoalesce(t) return r.to_dense() diff --git a/test/expect/TestCudaSparse.test_print.expect b/test/expect/TestCudaSparse.test_print.expect index cae534bd16e8d..2745fb59a2ba3 100644 --- a/test/expect/TestCudaSparse.test_print.expect +++ b/test/expect/TestCudaSparse.test_print.expect @@ -1,6 +1,6 @@ # shape: torch.Size([]) # nnz: 2 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 2]) # values shape: torch.Size([2]) ########## torch.int32 ########## @@ -30,15 +30,14 @@ tensor(indices=tensor([], size=(0, 4)), device='cuda:0', size=(), nnz=4, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64) # _values tensor([0., 1.], device='cuda:0', dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([0]) # nnz: 10 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 10]) # values shape: torch.Size([10, 0]) ########## torch.int32 ########## @@ -68,15 +67,14 @@ tensor(indices=tensor([], size=(0, 20)), device='cuda:0', size=(0,), nnz=20, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64) # _values tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([2]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 2]) ########## torch.int32 ########## @@ -119,17 +117,16 @@ tensor(indices=tensor([], size=(0, 6)), device='cuda:0', size=(2,), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([100, 3]) # nnz: 3 -# sparseDim: 1 +# sparse_dim: 1 # indices shape: torch.Size([1, 3]) # values shape: torch.Size([3, 3]) ########## torch.int32 ########## @@ -172,16 +169,16 @@ tensor(indices=tensor([[0, 1, 2, 0, 1, 2]]), device='cuda:0', size=(100, 3), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([[0, 1, 2]], device='cuda:0', grad_fn=) +tensor([[0, 1, 2]], device='cuda:0') # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([100, 20, 3]) # nnz: 0 -# sparseDim: 2 +# sparse_dim: 2 # indices shape: torch.Size([2, 0]) # values shape: torch.Size([0, 3]) ########## torch.int32 ########## @@ -211,15 +208,14 @@ tensor(indices=tensor([], size=(2, 0)), device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64) # _values tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([10, 0, 3]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 10, 0, 3]) ########## torch.int32 ########## @@ -249,15 +245,14 @@ tensor(indices=tensor([], size=(0, 6)), device='cuda:0', size=(10, 0, 3), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([10, 0, 3]) # nnz: 0 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 0]) # values shape: torch.Size([0, 10, 0, 3]) ########## torch.int32 ########## @@ -287,8 +282,7 @@ tensor(indices=tensor([], size=(0, 0)), device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64) # _values tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32, - grad_fn=) + grad_fn=) diff --git a/test/expect/TestCudaUncoalescedSparse.test_print.expect b/test/expect/TestCudaUncoalescedSparse.test_print.expect index e6207c6da3d27..30b8403a2eb64 100644 --- a/test/expect/TestCudaUncoalescedSparse.test_print.expect +++ b/test/expect/TestCudaUncoalescedSparse.test_print.expect @@ -1,6 +1,6 @@ # shape: torch.Size([]) # nnz: 2 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 2]) # values shape: torch.Size([2]) ########## torch.int32 ########## @@ -30,15 +30,14 @@ tensor(indices=tensor([], size=(0, 4)), device='cuda:0', size=(), nnz=4, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64) # _values tensor([0., 1.], device='cuda:0', dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([0]) # nnz: 10 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 10]) # values shape: torch.Size([10, 0]) ########## torch.int32 ########## @@ -68,15 +67,14 @@ tensor(indices=tensor([], size=(0, 20)), device='cuda:0', size=(0,), nnz=20, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64) # _values tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([2]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 2]) ########## torch.int32 ########## @@ -119,17 +117,16 @@ tensor(indices=tensor([], size=(0, 6)), device='cuda:0', size=(2,), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([100, 3]) # nnz: 3 -# sparseDim: 1 +# sparse_dim: 1 # indices shape: torch.Size([1, 3]) # values shape: torch.Size([3, 3]) ########## torch.int32 ########## @@ -172,16 +169,16 @@ tensor(indices=tensor([[0, 1, 0, 0, 1, 0]]), device='cuda:0', size=(100, 3), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([[0, 1, 0]], device='cuda:0', grad_fn=) +tensor([[0, 1, 0]], device='cuda:0') # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([100, 20, 3]) # nnz: 0 -# sparseDim: 2 +# sparse_dim: 2 # indices shape: torch.Size([2, 0]) # values shape: torch.Size([0, 3]) ########## torch.int32 ########## @@ -211,15 +208,14 @@ tensor(indices=tensor([], size=(2, 0)), device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64) # _values tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([10, 0, 3]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 10, 0, 3]) ########## torch.int32 ########## @@ -249,15 +245,14 @@ tensor(indices=tensor([], size=(0, 6)), device='cuda:0', size=(10, 0, 3), nnz=6, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([10, 0, 3]) # nnz: 0 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 0]) # values shape: torch.Size([0, 10, 0, 3]) ########## torch.int32 ########## @@ -287,8 +282,7 @@ tensor(indices=tensor([], size=(0, 0)), device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64, - grad_fn=) +tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64) # _values tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32, - grad_fn=) + grad_fn=) diff --git a/test/expect/TestSparse.test_print.expect b/test/expect/TestSparse.test_print.expect index c0223c1483741..4f282194f5cb8 100644 --- a/test/expect/TestSparse.test_print.expect +++ b/test/expect/TestSparse.test_print.expect @@ -1,6 +1,6 @@ # shape: torch.Size([]) # nnz: 2 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 2]) # values shape: torch.Size([2]) ########## torch.int32 ########## @@ -28,13 +28,13 @@ tensor(indices=tensor([], size=(0, 2)), size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 2), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], dtype=torch.float32, grad_fn=) +tensor([0., 1.], dtype=torch.float32, grad_fn=) # shape: torch.Size([0]) # nnz: 10 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 10]) # values shape: torch.Size([10, 0]) ########## torch.int32 ########## @@ -62,13 +62,13 @@ tensor(indices=tensor([], size=(0, 10)), size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 10), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 10), dtype=torch.int64) # _values -tensor([], size=(10, 0), dtype=torch.float32, grad_fn=) +tensor([], size=(10, 0), dtype=torch.float32, grad_fn=) # shape: torch.Size([2]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 2]) ########## torch.int32 ########## @@ -106,15 +106,15 @@ tensor(indices=tensor([], size=(0, 3)), size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 3), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], dtype=torch.float32, grad_fn=) + [1.3333, 1.6667]], dtype=torch.float32, grad_fn=) # shape: torch.Size([100, 3]) # nnz: 3 -# sparseDim: 1 +# sparse_dim: 1 # indices shape: torch.Size([1, 3]) # values shape: torch.Size([3, 3]) ########## torch.int32 ########## @@ -152,16 +152,16 @@ tensor(indices=tensor([[0, 1, 2]]), size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([[0, 1, 2]], grad_fn=) +tensor([[0, 1, 2]]) # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], [1.3333, 1.5556, 1.7778]], dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([100, 20, 3]) # nnz: 0 -# sparseDim: 2 +# sparse_dim: 2 # indices shape: torch.Size([2, 0]) # values shape: torch.Size([0, 3]) ########## torch.int32 ########## @@ -189,13 +189,13 @@ tensor(indices=tensor([], size=(2, 0)), size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(2, 0), dtype=torch.int64, grad_fn=) +tensor([], size=(2, 0), dtype=torch.int64) # _values -tensor([], size=(0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 3), dtype=torch.float32, grad_fn=) # shape: torch.Size([10, 0, 3]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 10, 0, 3]) ########## torch.int32 ########## @@ -223,13 +223,13 @@ tensor(indices=tensor([], size=(0, 3)), size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 3), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 3), dtype=torch.int64) # _values -tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=) # shape: torch.Size([10, 0, 3]) # nnz: 0 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 0]) # values shape: torch.Size([0, 10, 0, 3]) ########## torch.int32 ########## @@ -257,6 +257,6 @@ tensor(indices=tensor([], size=(0, 0)), size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 0), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 0), dtype=torch.int64) # _values -tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=) diff --git a/test/expect/TestUncoalescedSparse.test_print.expect b/test/expect/TestUncoalescedSparse.test_print.expect index 244442de0cc73..a642b4bb8c0ab 100644 --- a/test/expect/TestUncoalescedSparse.test_print.expect +++ b/test/expect/TestUncoalescedSparse.test_print.expect @@ -1,6 +1,6 @@ # shape: torch.Size([]) # nnz: 2 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 2]) # values shape: torch.Size([2]) ########## torch.int32 ########## @@ -28,13 +28,13 @@ tensor(indices=tensor([], size=(0, 2)), size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 2), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], dtype=torch.float32, grad_fn=) +tensor([0., 1.], dtype=torch.float32, grad_fn=) # shape: torch.Size([0]) # nnz: 10 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 10]) # values shape: torch.Size([10, 0]) ########## torch.int32 ########## @@ -62,13 +62,13 @@ tensor(indices=tensor([], size=(0, 10)), size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 10), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 10), dtype=torch.int64) # _values -tensor([], size=(10, 0), dtype=torch.float32, grad_fn=) +tensor([], size=(10, 0), dtype=torch.float32, grad_fn=) # shape: torch.Size([2]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 2]) ########## torch.int32 ########## @@ -106,15 +106,15 @@ tensor(indices=tensor([], size=(0, 3)), size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 3), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], dtype=torch.float32, grad_fn=) + [1.3333, 1.6667]], dtype=torch.float32, grad_fn=) # shape: torch.Size([100, 3]) # nnz: 3 -# sparseDim: 1 +# sparse_dim: 1 # indices shape: torch.Size([1, 3]) # values shape: torch.Size([3, 3]) ########## torch.int32 ########## @@ -152,16 +152,16 @@ tensor(indices=tensor([[0, 1, 0]]), size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([[0, 1, 0]], grad_fn=) +tensor([[0, 1, 0]]) # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], [1.3333, 1.5556, 1.7778]], dtype=torch.float32, - grad_fn=) + grad_fn=) # shape: torch.Size([100, 20, 3]) # nnz: 0 -# sparseDim: 2 +# sparse_dim: 2 # indices shape: torch.Size([2, 0]) # values shape: torch.Size([0, 3]) ########## torch.int32 ########## @@ -189,13 +189,13 @@ tensor(indices=tensor([], size=(2, 0)), size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(2, 0), dtype=torch.int64, grad_fn=) +tensor([], size=(2, 0), dtype=torch.int64) # _values -tensor([], size=(0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 3), dtype=torch.float32, grad_fn=) # shape: torch.Size([10, 0, 3]) # nnz: 3 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 3]) # values shape: torch.Size([3, 10, 0, 3]) ########## torch.int32 ########## @@ -223,13 +223,13 @@ tensor(indices=tensor([], size=(0, 3)), size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 3), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 3), dtype=torch.int64) # _values -tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=) # shape: torch.Size([10, 0, 3]) # nnz: 0 -# sparseDim: 0 +# sparse_dim: 0 # indices shape: torch.Size([0, 0]) # values shape: torch.Size([0, 10, 0, 3]) ########## torch.int32 ########## @@ -257,6 +257,6 @@ tensor(indices=tensor([], size=(0, 0)), size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo, grad_fn=) # _indices -tensor([], size=(0, 0), dtype=torch.int64, grad_fn=) +tensor([], size=(0, 0), dtype=torch.int64) # _values -tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=) diff --git a/test/test_autograd.py b/test/test_autograd.py index 49cd858228859..a35656dbc4195 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -553,14 +553,14 @@ def backward(self, grad_x): [0, 2, 2], ]) v1 = torch.DoubleTensor([[1, 2], [4, 5], [7, 8]]) - sparse_grad1 = Variable(torch.sparse.DoubleTensor(i1, v1, size)) + sparse_grad1 = torch.sparse.DoubleTensor(i1, v1, size) i2 = torch.LongTensor([ [0, 1, 3, 4], [0, 1, 2, 2], ]) v2 = torch.DoubleTensor([[1, 2], [4, 3], [4, 5], [7, 8]]) - sparse_grad2 = Variable(torch.sparse.DoubleTensor(i2, v2, size)) - dense_grad = Variable(torch.rand(size).double()) + sparse_grad2 = torch.sparse.DoubleTensor(i2, v2, size) + dense_grad = torch.rand(size).double() sparse_fn1 = FixedGradientFunction(sparse_grad1) sparse_fn2 = FixedGradientFunction(sparse_grad2) dense_fn = FixedGradientFunction(dense_grad) @@ -578,6 +578,48 @@ def backward(self, grad_x): (sparse_fn1(x) + sparse_fn2(x)).sum().backward() self.assertEqual(x.grad, sparse_grad1 + sparse_grad2) + def test_sparse_ctor_getter_backward(self): + def test(size, sparse_dim, nnz): + v_size = [nnz] + list(size[sparse_dim:]) + i = torch.rand(sparse_dim, nnz) + i.mul_(torch.tensor(size[:sparse_dim]).unsqueeze(1).to(i)) + i = i.to(torch.long) + + inp = torch.randn(v_size, requires_grad=True) + other = self.genSparseTensor(size, sparse_dim, nnz, is_uncoalesced=True)[0] + + # Use .indices() and .values() + def fn1(v): + x = torch.sparse_coo_tensor(i, v, size) + y = (x + other).coalesce() + yv = y.values() + new_v = yv.tanh() + z = torch.sparse_coo_tensor(y.indices(), new_v, y.size()) + return z.coalesce().values() + + # Use ._indices() and ._values() + def fn2(v): + x = torch.sparse_coo_tensor(i, v, size) + y = (x + other) + yv = y._values() + new_v = yv.tanh() + z = torch.sparse_coo_tensor(y._indices(), new_v, y.size()) + return z.coalesce()._values() + + for fn in (fn1, fn2): + gradcheck(fn, (inp,)) + # FIXME: make gradgradcheck work. + # gradgradcheck(fn, (inp,)) + + for size in ([0, 10], [2, 0, 3]): + for sparse_dim in (0, 1, 2): + for nnz in (0, 10): + if nnz > 0 and any(size[d] == 0 for d in range(sparse_dim)): + # impossible combination because the sparse dims have + # empty numel, but nnz > 0. + continue + test(size=size, sparse_dim=sparse_dim, nnz=nnz) + def test_multi_backward(self): x = torch.randn(5, 5, requires_grad=True) y = torch.randn(5, 5, requires_grad=True) diff --git a/test/test_sparse.py b/test/test_sparse.py index d32aabb145a65..a26bd2bfc1665 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -44,47 +44,16 @@ def setUp(self): self.SparseTensor = torch.sparse.DoubleTensor super(TestSparse, self).setUp() - def _gen_sparse(self, sparse_dims, nnz, with_size): - # TODO: Consider implementing this in the CUDA case by directly - # performing the operations on the GPU. You won't be able to - # use torch.rand/torch.randn in this case because they are - # CPU-only. If you do this, you can remove the is_cuda branch - # at the end. - # - # If you do this, be sure to update assert_uncoalesced too - + def _gen_sparse(self, sparse_dim, nnz, with_size): if isinstance(with_size, Number): - with_size = [with_size] * sparse_dims + with_size = [with_size] * sparse_dim + + x, i, v = self.genSparseTensor(with_size, sparse_dim, nnz, self.is_uncoalesced, self.device) if self.is_uncoalesced: - # We want to generate a tensor with a lot of uncoalesced - # entries to stress test whether or not we handle this - # (subtle) case correctly - v_size = [nnz * 2] + list(with_size[sparse_dims:]) - v = torch.randn(*v_size) - r = torch.rand(sparse_dims, nnz) - # Repeat the indexes, so every position shows up twice - i = torch.cat([r, r], dim=1) - if nnz > 0: - i *= torch.Tensor(with_size[:sparse_dims]).repeat(nnz * 2, 1).transpose(0, 1) - i = i.type(torch.LongTensor) - x = torch.sparse.DoubleTensor(i, v, torch.Size(with_size)) self.assert_uncoalesced(x) - else: - # Generate a sparse tensor with sparse_dims sparse dimensions; the - # rest the dimensions with_size[sparse_dims:] are dense. - v_size = [nnz] + list(with_size[sparse_dims:]) - v = torch.randn(*v_size) - i = torch.rand(sparse_dims, nnz) - if nnz > 0: - i *= torch.Tensor(with_size[:sparse_dims]).repeat(nnz, 1).transpose(0, 1) - i = i.type(torch.LongTensor) - x = torch.sparse.DoubleTensor(i, v, torch.Size(with_size)) - if self.is_cuda: - return x.cuda(), i.cuda(), v.cuda() - else: - return x, i.clone(), v.clone() + return x, i, v def assert_uncoalesced(self, x): """ @@ -109,7 +78,7 @@ def randn(self, *args, **kwargs): @skipIfRocm # ROCm stack doesn't like the x + x call def test_print(self): - shape_sparseDim_nnz = [ + shape_sparse_dim_nnz = [ ((), 0, 2), ((0,), 0, 10), ((2,), 0, 3), @@ -120,18 +89,18 @@ def test_print(self): ] printed = [] - for shape, sparseDim, nnz in shape_sparseDim_nnz: - indices_shape = torch.Size((sparseDim, nnz)) - values_shape = torch.Size((nnz,) + shape[sparseDim:]) + for shape, sparse_dim, nnz in shape_sparse_dim_nnz: + indices_shape = torch.Size((sparse_dim, nnz)) + values_shape = torch.Size((nnz,) + shape[sparse_dim:]) printed.append("# shape: {}".format(torch.Size(shape))) printed.append("# nnz: {}".format(nnz)) - printed.append("# sparseDim: {}".format(sparseDim)) + printed.append("# sparse_dim: {}".format(sparse_dim)) printed.append("# indices shape: {}".format(indices_shape)) printed.append("# values shape: {}".format(values_shape)) indices = torch.arange(indices_shape.numel(), dtype=self.IndexTensor.dtype, device=self.device).view(indices_shape) - for d in range(sparseDim): + for d in range(sparse_dim): indices[d].clamp_(max=(shape[d] - 1)) # make it valid index if self.is_uncoalesced and indices.numel() > 0: indices[:, -1] = indices[:, 0] # make it uncoalesced @@ -174,6 +143,16 @@ def test_shape(sparse_dims, nnz, with_size): self.assertEqual(self.safeCoalesce(x)._nnz(), nnz) self.assertEqual(list(x.size()), with_size) + # Test .indices() and .values() + if self.is_uncoalesced: + with self.assertRaisesRegex(RuntimeError, "Cannot get indices on an uncoalesced tensor"): + x.indices() + with self.assertRaisesRegex(RuntimeError, "Cannot get values on an uncoalesced tensor"): + x.values() + else: + self.assertEqual(x.indices(), x._indices()) + self.assertEqual(x.values(), x._values()) + test_shape(3, 10, 100) test_shape(3, 10, [100, 100, 100]) test_shape(3, 10, [100, 100, 100, 5, 5, 5, 0]) @@ -569,11 +548,11 @@ def test_cross_device(x1, x2): def test_cuda_empty(self): def test_tensor(x): y = x.cuda(0) - self.assertEqual(x._sparseDims(), y._sparseDims()) - self.assertEqual(x._denseDims(), y._denseDims()) + self.assertEqual(x.sparse_dim(), y.sparse_dim()) + self.assertEqual(x.dense_dim(), y.dense_dim()) x = y.cpu() - self.assertEqual(y._sparseDims(), x._sparseDims()) - self.assertEqual(y._denseDims(), x._denseDims()) + self.assertEqual(y.sparse_dim(), x.sparse_dim()) + self.assertEqual(y.dense_dim(), x.dense_dim()) x = torch.sparse.FloatTensor(2, 3, 4) test_tensor(x) @@ -609,8 +588,9 @@ def test_shape(di, dj, dk, nnz): x_coalesced = x.coalesce() self.assertTrue(x_coalesced.is_coalesced()) - x_coalesced_t = x.t() - self.assertFalse(x_coalesced_t.is_coalesced()) + x_coalesced_t = x_coalesced.t() + # Transpose is `colasced`-preserving if the indices tensor is empty. + self.assertEqual(x_coalesced_t.is_coalesced(), di * nnz == 0) res = torch.mm(x_coalesced_t, y) expected = torch.mm(self.safeToDense(x_coalesced_t), y) @@ -629,8 +609,8 @@ def test_in_place(x): self.assertEqual(torch.Size([shape_original[1], shape_original[0]]), x.size()) self.assertEqual(0, x._indices().numel()) self.assertEqual(0, x._values().numel()) - self.assertEqual(x._sparseDims(), 2) - self.assertEqual(x._denseDims(), 0) + self.assertEqual(x.sparse_dim(), 2) + self.assertEqual(x.dense_dim(), 0) def test_not_in_place(x): shape_original = x.shape @@ -638,8 +618,8 @@ def test_not_in_place(x): self.assertEqual(torch.Size([shape_original[1], shape_original[0]]), y.size()) self.assertEqual(0, y._indices().numel()) self.assertEqual(0, y._values().numel()) - self.assertEqual(x._sparseDims(), 2) - self.assertEqual(x._denseDims(), 0) + self.assertEqual(x.sparse_dim(), 2) + self.assertEqual(x.dense_dim(), 0) x = self.SparseTensor(2, 3) test_in_place(x) @@ -886,15 +866,21 @@ def _test_basic_ops_shape(self, nnz_x1, nnz_x2, shape_i, shape_v=None): expected = torch.zeros(x1.size()) self.assertEqual(self.safeToDense(y), expected) - self.assertFalse(x1.is_coalesced()) + self.assertEqual(x1.is_coalesced(), not self.is_uncoalesced) y = x1.coalesce() z = x1.coalesce() - self.assertFalse(x1.is_coalesced()) + self.assertEqual(x1.is_coalesced(), not self.is_uncoalesced) self.assertTrue(y.is_coalesced()) self.assertEqual(x1, y) - # check that coalesce is out of place y._values().add_(1) - self.assertEqual(z._values() + 1, y._values()) + if not x1.is_coalesced(): + # check that coalesce is out of place if the original tensor is not + # coalesced. + self.assertEqual(z._values() + 1, y._values()) + else: + # check that coalesce is in-place if the original tensor is + # coalesced. + self.assertEqual(z._values(), y._values()) @skipIfRocm def test_basic_ops(self): @@ -1051,8 +1037,8 @@ def _test_zeros(self, nnzs, shape, out_shape_i, out_shape_v=None): self.assertEqual(tuple(out.size()), tuple(shape)) self.assertTrue(out._indices().numel() == out._values().numel() == 0) self.assertEqual(out._nnz(), 0) - self.assertEqual(out._sparseDims(), len(shape)) - self.assertEqual(out._denseDims(), 0) + self.assertEqual(out.sparse_dim(), len(shape)) + self.assertEqual(out.dense_dim(), 0) def test_zeros(self): def test_shape(i_shapes, v_shapes, shape, nnzs): @@ -1075,8 +1061,8 @@ def _test_zeros_like(self, nnzs, template_shape_i, template_shape_v=None): self.assertEqual(tuple(res.size()), tuple(template_shape)) self.assertTrue(res._indices().numel() == res._values().numel() == 0) self.assertEqual(res._nnz(), 0) - self.assertEqual(res._sparseDims(), len(template_shape_i)) - self.assertEqual(res._denseDims(), len(template_shape_v)) + self.assertEqual(res.sparse_dim(), len(template_shape_i)) + self.assertEqual(res.dense_dim(), len(template_shape_v)) def test_zeros_like(self): def test_shape(i_shapes, v_shapes, nnzs): @@ -1119,7 +1105,7 @@ def test_narrow(self): self.assertRaises(RuntimeError, lambda: with_dense.narrow_copy(10, 0, 3)) # dim > sparseDim + denseDim def _test_log1p_tensor(self, input, dense_tensor): - expected_output = torch.tensor(dense_tensor).log1p_() + expected_output = dense_tensor.log1p() self.assertEqual(expected_output, input.log1p().to_dense()) self.assertEqual(expected_output, input.coalesce().log1p_().to_dense()) @@ -1143,7 +1129,7 @@ def test_log1p(self): torch.FloatTensor([3, 4, 5]), torch.Size([3]), device=self.device) - self._test_log1p_tensor(input, [3., 4., 5.]) + self._test_log1p_tensor(input, torch.as_tensor([3., 4., 5.])) # test uncoalesced input input_uncoalesced = torch.sparse_coo_tensor( @@ -1151,7 +1137,7 @@ def test_log1p(self): torch.FloatTensor([2, 3, 4, 1, 1, 1]), torch.Size([3]), device=self.device) - self._test_log1p_tensor(input_uncoalesced, [3., 4., 5.]) + self._test_log1p_tensor(input_uncoalesced, torch.as_tensor([3., 4., 5.])) input = torch.sparse_coo_tensor( torch.zeros([2, 0]), @@ -1309,7 +1295,7 @@ def test_factory_size_check(self): [0, 2]]) values = self.ValueTensor([.5, .5]) sizes = torch.Size([2, 3]) - with self.assertRaisesRegex(RuntimeError, "sizes is inconsistent with indices"): + with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"): torch.sparse_coo_tensor(indices, values, sizes) indices.fill_(-1) @@ -1320,14 +1306,14 @@ def test_factory_size_check(self): [0, 2]]) values = self.ValueTensor(2, 1, 0) sizes = torch.Size([2, 3, 1, 0]) - with self.assertRaisesRegex(RuntimeError, "sizes is inconsistent with indices"): + with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"): torch.sparse_coo_tensor(indices, values, sizes) indices = self.IndexTensor([[1, 2], [0, 2]]) values = self.ValueTensor(2, 2, 2) sizes = torch.Size([0, 0, 2, 2]) - with self.assertRaisesRegex(RuntimeError, "sizes is inconsistent with indices"): + with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"): torch.sparse_coo_tensor(indices, values, sizes) indices = self.IndexTensor([[1, 2], @@ -1370,13 +1356,13 @@ def test_factory_empty_indices(self): self.assertEqual(tensor._indices(), expected_indices) def test_factory_nnz(self): - indices = self.IndexTensor([[0]]) # (sparseDims, nnz): (1, 1) + indices = self.IndexTensor([[0]]) # (sparse_dim, nnz): (1, 1) values = self.ValueTensor([[1, 1], [1, 1]]) # (nnz, ...): (2, 2) sizes = torch.Size([2, 2]) with self.assertRaisesRegex(RuntimeError, "indices and values must have same nnz"): torch.sparse_coo_tensor(indices, values, sizes) - indices = self.IndexTensor([[0]]) # (sparseDims, nnz): (1, 1) + indices = self.IndexTensor([[0]]) # (sparse_dim, nnz): (1, 1) values = self.ValueTensor(2, 0) # (nnz, ...): (2, 0) sizes = torch.Size([2, 0]) with self.assertRaisesRegex(RuntimeError, "indices and values must have same nnz"): @@ -1402,7 +1388,8 @@ def test_shape(i_shape, v_shape, size, expected_size): test_shape([3, 0], [0, 2, 4, 0], [0, 0, 0, 2, 4, 0], [0, 0, 0, 2, 4, 0]) test_shape([3, 0], [0, 2, 4, 0], [1, 2, 3, 2, 4, 0], [1, 2, 3, 2, 4, 0]) - def test_factory_dense_dims(self): + @skipIfRocm + def test_factory_dense_dim(self): indices = self.IndexTensor([[0]]) values = self.ValueTensor([[[1, 1, 1], [1, 1, 1]]]) sizes = torch.Size([1, 3, 4]) @@ -1563,7 +1550,7 @@ def do_test(t): y = t.new().resize_as_(t).zero_() self.assertEqual(y.shape, t.shape) # Check that y can be added to t. Currently, this requires that - # _sparseDims and _denseDims match. + # sparse_dim and dense_dim match. self.assertEqual(t, t + y) do_test(self.SparseTensor()) @@ -1585,8 +1572,8 @@ def _test_resize_shape(self, x_i, x_v, x_size, y_i, y_v, y_size): x.resize_as_(y) x_dense.resize_as_(y_dense) self.assertEqual(x.shape, y.shape) - self.assertEqual(x._sparseDims(), y._sparseDims()) - self.assertEqual(x._denseDims(), y._denseDims()) + self.assertEqual(x.sparse_dim(), y.sparse_dim()) + self.assertEqual(x.dense_dim(), y.dense_dim()) self.assertEqual(x.shape, x_dense.shape) self.assertEqual(y.shape, y_dense.shape) # Here we make sure that the original data are preserved after resizing diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index f0e87e85f3c7a..e3569a3c3fd2c 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -8,6 +8,12 @@ # Note that a single gradient entry can specify the gradient # formula for multiple input names, by specifying a key # "input1, input2" (see atan2 for an example). +# - Optional entry with key 'output_differentiability' and value a list of the +# same length as the number of outputs from the forward function. The list +# should contain only booleans, specifying whether each of the output Tensor +# is differentiable. +# If None of the output is differentiable, you can also add the function +# name to `gen_variable_type.py`'s `DONT_REQUIRE_DERIVATIVE` list. # # If a function has out-of-place and in-place variants, then the derivative # definition for the in-place variant is optional. It will default to the @@ -23,13 +29,16 @@ # # When a function returns multiple *differentiable* outputs, # you can refer to the gradients of each outputs using 'grads', -# e.g., 'grads[0]', 'grads[1]' +# e.g., 'grads[0]', 'grads[1]'. # # When a function returns *one* differentiable output (the # first output) and some more nondifferentiable outputs, # you MUST refer to the gradient of the differentiable output with # 'grad' (this case is special-cased in our code generation). # +# Note that the number of differentibale outputs can be modified by the +# 'output_differentiability' entry (see above). +# # - Any of the input arguments, tensor or non-tensor, including # argument names that only appear in Declarations.cwrap, e.g. 'output'. # @@ -189,6 +198,9 @@ - name: clone(Tensor self) self: grad +- name: coalesce(Tensor self) + self: grad + - name: cos(Tensor self) self: grad * -self.sin() @@ -321,6 +333,12 @@ self: gesv_backward_self(grad, self, A) A: gesv_backward_A(grad, self, A, result0) +- name: indices(Tensor self) + output_differentiability: [False] + +- name: _indices(Tensor self) + output_differentiability: [False] + - name: grid_sampler_2d(Tensor input, Tensor grid, int64_t interpolation_mode, int64_t padding_mode) input, grid: grid_sampler_2d_backward(grad, input, grid, interpolation_mode, padding_mode) @@ -785,12 +803,21 @@ self: not_implemented("sparse_mask") mask: not_implemented("sparse_mask") +- name: sparse_coo_tensor_with_dims_and_tensors(Type sparse_dtype, int64_t sparse_dim, int64_t dense_dim, IntList size, Tensor indices, Tensor values) + values: sparse_constructor_values_backward(grad, indices, values.sizes()) + - name: _standard_gamma(Tensor self, Generator generator) self: grad * _standard_gamma_grad(self, result) - name: _standard_gamma_grad(Tensor self, Tensor output) self: not_implemented("_standard_gamma_grad") +- name: values(Tensor self) + self: sparse_values_backward(grad, self.indices(), self.sizes(), self.is_coalesced()) + +- name: _values(Tensor self) + self: sparse_values_backward(grad, self._indices(), self.sizes(), self.is_coalesced()) + # NN - name: _trilinear(Tensor i1, Tensor i2, Tensor i3, IntList expand1, IntList expand2, IntList expand3, IntList sumdim, int64_t unroll_dim) i1, i2, i3: _trilinear_backward(grad, i1, i2, i3, expand1, expand2, expand3, sumdim, unroll_dim, grad_input_mask) @@ -1234,7 +1261,11 @@ save_var: not_implemented("cudnn_batch_norm_backward save_var") input, weight, grad_output: batchnorm_double_backward(input, weight, grads[0], grads[1], grads[2], grad_output, running_mean, running_var, true, epsilon, save_mean, save_var, grad_input_mask) + +# Only frst three of _cudnn_rnn outputs can have gradients. +# _cudnn_rnn outputs: (output, hy, cy, reserve, weight_buf) - name: _cudnn_rnn(Tensor input, TensorList weight, int64_t weight_stride0, Tensor weight_buf, Tensor hx, Tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntList batch_sizes, Tensor dropout_state) + output_differentiability: [True, True, True, False, False] input, hx, cx, weight: "_cudnn_rnn_backward(input, weight, weight_stride0, result4, hx, cx, result0, grads[0], grads[1], grads[2], mode, hidden_size, num_layers, batch_first, dropout, train, bidirectional, batch_sizes, dropout_state, retain_variables ? result3.clone() : result3, grad_input_mask)" # miopen @@ -1274,7 +1305,11 @@ tensors: unbind(grad, dim) # fused RNN kernels + +# Only frst two of _thnn_fused_lstm_cell outputs can have gradients. +# _thnn_fused_lstm_cell outputs: (hy, cy, workspace) - name: _thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor input_bias, Tensor hidden_bias) + output_differentiability: [True, True, False] input_gates, hidden_gates, cx, input_bias, hidden_bias: _thnn_fused_lstm_cell_backward(grads[0], grads[1], cx, result1, result2, input_bias.defined()) - name: _thnn_fused_gru_cell(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor input_bias, Tensor hidden_bias) diff --git a/tools/autograd/gen_autograd.py b/tools/autograd/gen_autograd.py index 4cf21ed7ad2ed..e09bc810c3f8a 100644 --- a/tools/autograd/gen_autograd.py +++ b/tools/autograd/gen_autograd.py @@ -15,29 +15,34 @@ from .utils import YamlLoader, split_name_params # See NOTE [ Autograd View Variables ] in variable.h for details. +# A map: function name => two options: +# 1. name of the argument that all outputs are view of +# 2. map: output idx => name of the argument that this result is view of VIEW_FUNCTIONS = { - 'alias', 'as_strided', 'diagonal', 'expand', 'narrow', 'permute', 'select', 'slice', - 'squeeze', 't', 'transpose', 'unfold', 'unsqueeze', 'view', 'unbind', -} - -# In principle this should live in derivatives.yaml, but I could not -# think of a good syntax for it -HARDCODED_DIFFERENTIABLE_OUTPUTS = { - # Suppose that 'foo' is a function for which outputs 0 and 1 are - # differentiable, and 2 is not. Then you would write: - # 'foo': (0, 1), - '_cudnn_rnn': (0, 1, 2), - # _cudnn_rnn outputs: - # 0 => output - # 1 => hy - # 2 => cy - # 3 => reserve - # 4 => weight_buf - '_thnn_fused_lstm_cell': (0, 1), - # _thnn_fused_lstm_cell outputs: - # 0 => hy - # 1 => cy - # 2 => workspace + 'alias': 'self', + 'as_strided': 'self', + 'diagonal': 'self', + 'expand': 'self', + 'narrow': 'self', + 'permute': 'self', + 'select': 'self', + 'slice': 'self', + 'squeeze': 'self', + 't': 'self', + 'transpose': 'self', + 'unfold': 'self', + 'unsqueeze': 'self', + 'view': 'self', + 'unbind': 'self', + '_indices': 'self', + '_values': 'self', + 'indices': 'self', + 'values': 'self', + # sparse_coo ctor output should really be views of both indices and values, + # but we only supports making as view of a single varible, and indices is + # discrete anyways. + # FIXME: clone indices on construction. + 'sparse_coo_tensor_with_dims_and_tensors': 'values', } diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py index b1746e47bd511..71bff84d0c3c2 100644 --- a/tools/autograd/gen_python_functions.py +++ b/tools/autograd/gen_python_functions.py @@ -20,7 +20,7 @@ 'alias', 'contiguous', 'is_cuda', 'is_sparse', 'size', 'stride', '.*_backward', '.*_backward_(out|input|weight|bias)', '.*_forward', '.*_forward_out', '_unsafe_view', 'tensor', - 'sparse_coo_tensor', 'th_sparse_coo_tensor', 'native_sparse_coo_tensor', + 'sparse_coo_tensor', 'sparse_coo_tensor_with_dims.*', '_arange.*', '_range.*', '_linspace.*', '_logspace.*', '_sparse_add.*', '_sparse_div.*', '_sparse_mul.*', '_sparse_sub.*', 'index', diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index 3f61f5341aed3..8e46aa0372734 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -26,7 +26,7 @@ import os import sys from .utils import CodeTemplate, nested_dict, write, uninplace_api_name -from .gen_autograd import VIEW_FUNCTIONS, HARDCODED_DIFFERENTIABLE_OUTPUTS +from .gen_autograd import VIEW_FUNCTIONS from .gen_autograd_functions import uses_single_grad @@ -43,7 +43,9 @@ DONT_RECORD_TRACE = { 'convolution', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d', 'lstm_cell', 'gru_cell', - 'rnn_tanh_cell', 'rnn_relu_cell', 'linear' + 'rnn_tanh_cell', 'rnn_relu_cell', 'linear', + # FIXME: figure out a better way when we support sparse tensors in jit + '_coalesced_', } # These functions have their names recorded under trace renamed, @@ -71,13 +73,13 @@ # tensors that have requires_grad=False. In-place functions listed here will # not examine or modify requires_grad or grad_fn. DONT_REQUIRE_DERIVATIVE = { - # These only depend on the input Tensor's shape and device, not the data + # These only depend on the input Tensor's shape and device, not the data 'ones_like', 'zeros_like', 'rand_like', 'randn_like', - # Tensor constructors - 'sparse_coo_tensor', 'th_sparse_coo_tensor', 'native_sparse_coo_tensor', # These are only implemented on integral types '__and__', '__iand__', '__ilshift__', '__ior__', '__irshift__', '__ixor__', '__lshift__', '__or__', '__rshift__', '__xor__', + # This is an unsafe method that is meant to be out of reach of autograd. + '_coalesced_', } METHOD_DECLARATION = CodeTemplate("""\ @@ -180,7 +182,7 @@ def should_trace(declaration): return False name = declaration['name'] base_name = name[:-1] if declaration['inplace'] else name[:-4] if name.endswith('_out') else name - if base_name in DONT_RECORD_TRACE: + if base_name in DONT_RECORD_TRACE or name in DONT_RECORD_TRACE: return False # We need to disable these because their inner implementations implement # broadcasting, and if we trace them top level we will lose the expand nodes. @@ -313,7 +315,7 @@ def emit_body(declaration): returns_void = len(returns) == 1 and returns[0]['type'] == 'void' base_name = name[:-1] if inplace else name[:-4] if is_out_fn else name - is_view = base_name in VIEW_FUNCTIONS + view_info = VIEW_FUNCTIONS.get(base_name, None) # These exclude things like BoolTensor, int64_t, and Scalar def is_differentiable(arg): @@ -329,18 +331,19 @@ def is_differentiable(arg): differentiable_inputs = list(filter(is_differentiable, inputs)) candidate_differentiable_outputs = list(filter(is_differentiable, returns)) - hardcoded_diff = HARDCODED_DIFFERENTIABLE_OUTPUTS.get(name) - if hardcoded_diff: + if func is not None and func.get('output_differentiability') is not None: differentiable_outputs = [] - for i in hardcoded_diff: - differentiable_outputs.append(candidate_differentiable_outputs[i]) + output_differentiability = func.get('output_differentiability') + for differentiable, output in zip(output_differentiability, returns): + if differentiable: + differentiable_outputs.append(output) elif uses_single_grad(func): differentiable_outputs = candidate_differentiable_outputs[:1] else: differentiable_outputs = candidate_differentiable_outputs requires_derivative = ( - base_name not in DONT_REQUIRE_DERIVATIVE and + base_name not in DONT_REQUIRE_DERIVATIVE and name not in DONT_REQUIRE_DERIVATIVE and len(differentiable_inputs) > 0 and len(differentiable_outputs) > 0 and strategy == 'use_derived') @@ -455,27 +458,77 @@ def declare_returned_variables(): return '\n'.join(names) def wrap_output(call): + # Returns a 2-tuple `(wrapped_call, extra_wrapping_stmts)`, where + # `wrapped_call` is to drop-in replace `call`, and + # `extra_wrapping_stmts` is a list of extra statements to run after + # `call`. if 'Tensor' not in declaration['return_type']: - return call - elif is_view: - # If `GradMode::is_enabled()` is False, this is a non-differentiable - # view. Gradients should not flow through. + return call, [] + elif view_info is not None: # See NOTE [ Autograd View Variables ] in variable.h for details. - return 'as_view(self, {}, GradMode::is_enabled())'.format(call) + differentiable_output_vars = {r['name'] for r in differentiable_outputs} + tensor_output_vars = {r['name'] for r in returns if 'Tensor' in r['type']} + if not isinstance(view_info, dict): + if len(differentiable_output_vars) == len(tensor_output_vars): + # all outputs are differentiable + return 'as_view({}, {}, GradMode::is_enabled())'.format(view_info, call), [] + elif len(differentiable_output_vars) == 0: + # no output is differentiable + return 'as_view({}, {}, false)'.format(view_info, call), [] + else: + # some of the outputs are differentiable + # need to expand to dict mode + base_name = view_info + view_info_dict = {} + for i, return_info in enumerate(returns): + if 'Tensor' in return_info['type']: + view_info_dict[i] = base_name + else: + view_info_dict = view_info + + def wrap_view_single(output_var, base_var): + fmt = '{output_var} = as_view({base_var}, {output_var}, {potentially_tracks_history});' + if output_var in differentiable_output_vars: + # If `GradMode::is_enabled()` is False, this is a + # non-differentiable view. Gradients should not flow through. + potentially_tracks_history = 'GradMode::is_enabled()' + else: + # This output is non-differentiable, so it is a + # non-differentiable view. Gradients should not flow through. + potentially_tracks_history = 'false' + return fmt.format(output_var=output_var, base_var=base_var, + potentially_tracks_history=potentially_tracks_history) + + extra_wrapping_stmts = [] + for output_idx, return_info in enumerate(returns): + if 'Tensor' not in return_info['type']: + assert output_idx not in view_info_dict, 'Can not wrap non-Tensor output as a view' + continue + output_var = return_info['name'] + if output_idx in view_info_dict: + stmt = wrap_view_single(output_var, view_info_dict[output_idx]) + elif 'Tensor' in return_info['type']: + stmt = '{output_var} = as_variable({output_var});'.format(output_var=output_var) + extra_wrapping_stmts.append(stmt) + return call, extra_wrapping_stmts else: - return 'as_variable({})'.format(call) + return 'as_variable({})'.format(call), [] def emit_call(env): combined = nested_dict(env, declaration) + extra_wrapping_stmts = [] if strategy == 'use_derived': call = CALL_VIA_DERIVED.substitute(combined) if not modifies_arguments: - call = wrap_output(call) + call, extra_wrapping_stmts = wrap_output(call) else: call = CALL_VIA_TYPE.substitute(declaration) if not modifies_arguments and not returns_void: call = '{} = {}'.format(tie_return_values(), call) - return call + ';' + call = call + ';' + for stmt in extra_wrapping_stmts: + call += '\n' + stmt + return call def tie_return_values(): if len(declaration['returns']) == 1: @@ -500,7 +553,7 @@ def get_return_value(): return 'std::make_tuple({})'.format(', '.join(moved)) def emit_history(): - fn = 'rebase' if modifies_arguments and not is_view else 'set' + fn = 'rebase' if modifies_arguments and view_info is None else 'set' output_names = [r['name'] for r in differentiable_outputs] # TODO: flatten allocates a std::vector, which could be expensive outs = CodeTemplate("flatten_tensor_args( ${outs} )").substitute(outs=output_names) @@ -615,7 +668,8 @@ def dispatch_strategy(declaration): get dispatched back to VariableType (which will ensure that they are differentiable.) """ - if (declaration['abstract'] or declaration['derivative'] is not None): + if (declaration['abstract'] or declaration['requires_tensor'] or + declaration['derivative'] is not None): # If the function is abstract (not implemented on at::Type), we must # call the implementation on the derived type with unpacked tensors. @@ -634,6 +688,6 @@ def dispatch_strategy(declaration): else: # If the function is concrete (we don't have to override it) and we # didn't declare it in derivatives.yaml, we'll assume that it is - # actually implemented out of differentiable functions. (This + # actually implemented out of differentiable functions. (This # assumption might not hold, but then you'll see gradcheck fail.) return 'use_type' diff --git a/tools/autograd/load_derivatives.py b/tools/autograd/load_derivatives.py index ef4ef8d9acbb5..c2ede2d9d7b6d 100644 --- a/tools/autograd/load_derivatives.py +++ b/tools/autograd/load_derivatives.py @@ -9,7 +9,6 @@ import yaml from .utils import YamlLoader from .utils import IDENT_REGEX, split_name_params -from .gen_autograd import HARDCODED_DIFFERENTIABLE_OUTPUTS def load_derivatives(path, declarations): @@ -30,7 +29,8 @@ def load_derivatives(path, declarations): # How do you feel about pasting declaration inside autograd function... -def create_autograd_function(name, derivatives, args_with_gradients, signature, declaration): +def create_autograd_function(name, derivatives, args_with_gradients, signature, + declaration, output_differentiability): op = to_camel_case(name) + 'Backward' op = op.replace('ForwardBackward', 'Backward') return { @@ -42,6 +42,7 @@ def create_autograd_function(name, derivatives, args_with_gradients, signature, 'derivatives': derivatives, 'saved_inputs': all_saved_variables(derivatives, 'saved_inputs'), 'saved_outputs': all_saved_variables(derivatives, 'saved_outputs'), + 'output_differentiability': output_differentiability, } @@ -128,36 +129,6 @@ def check_grad_usage(defn_name, declaration, derivatives): "otherwise, there is a likely error in your derivatives " "declaration.".format(defn_name)) - hardcoded_diff = HARDCODED_DIFFERENTIABLE_OUTPUTS.get(defn_name) - if hardcoded_diff: - if used_grad: - raise RuntimeError("Derivative definition {} has hard-coded differentiable " - "outputs in gen_autograd.py, but used grad (which implies " - "only the first output is differentiable) in its " - "derivative declaration. You likely meant to write " - "grads[i] for some i instead.".format(defn_name)) - if only_used_grads_indices and set(used_grads_indices) != set(hardcoded_diff): - raise RuntimeError("Derivative definition {} has hard-coded differentiable " - "outputs {}, but the used grads in the derivative " - "definitions are only {}. Either your derivatives " - "declaration is wrong, or the value of " - "HARDCODED_DIFFERENTIABLE_OUTPUTS in gen_autograd.py " - "is wrong.".format(defn_name, hardcoded_diff, - used_grads_indices)) - else: - if fully_implemented and not used_grad and \ - used_grads and only_used_grads_indices and \ - set(used_grads_indices) != set(range(len(declaration['returns']))): - raise RuntimeError("Derivative definition of {} in derivatives.yaml does " - "not refer to the gradients of all of its outputs. Either " - "the derivatives declaration is wrong, OR you have some " - "non-differentiable outputs. If you have a single " - "differentiable output, make it the first output in ATen " - "and reference its gradient with 'grad'; otherwise, hard " - "code the list of differentiable outputs in " - "HARDCODED_DIFFERENTIABLE_OUTPUTS in gen_autograd.py." - .format(defn_name)) - def set_up_derivatives(defn_name, defn, declaration): # Determine the set of inputs which have gradients args_with_gradients_set = set() @@ -188,6 +159,9 @@ def unzip(xs): # NB: Removes 'name' from defn dictionary defn_name, params = split_name_params(defn.pop('name')) + # NB: Removes 'output_differentiability' from defn dictionary + # `None` means all differentiable. + output_differentiability = defn.pop('output_differentiability', None) param_types, param_names = unzip([p.split(' ') for p in params if p != '*']) if 'grad_input_mask' in param_names: raise RuntimeError("Signature for {} has an argument named grad_input_mask, " @@ -221,7 +195,8 @@ def unzip(xs): .format(i, defn_name, x, y)) derivatives, args_with_gradients = set_up_derivatives(defn_name, defn, canonical) - return create_autograd_function(defn_name, derivatives, args_with_gradients, signature, canonical) + return create_autograd_function(defn_name, derivatives, args_with_gradients, + signature, canonical, output_differentiability) def ensure_unique_names(autograd_functions): diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp index 68197d7d82515..1abc69ed20e92 100644 --- a/tools/autograd/templates/Functions.cpp +++ b/tools/autograd/templates/Functions.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -1964,8 +1965,8 @@ std::tuple batchnorm_double_backward( } std::tuple _trilinear_backward(const Tensor& grad_out, const Tensor& i1, const Tensor& i2, const Tensor& i3, - IntList expand1, IntList expand2, IntList expand3, - IntList sumdim, int64_t unroll_dim, std::array grad_mask) { + IntList expand1, IntList expand2, IntList expand3, + IntList sumdim, int64_t unroll_dim, std::array grad_mask) { Tensor grad_i1, grad_i2, grad_i3; if (grad_mask[0]) grad_i1 = at::_trilinear(grad_out, i2, i3, sumdim, expand2, expand3, expand1); @@ -1987,6 +1988,27 @@ Tensor log1p_backward(const Tensor& grad, const Tensor& self) { return grad / (self + 1); } +Tensor sparse_values_backward(const Tensor& grad_values, const Tensor& indices, + IntList sizes, bool input_is_coalesced) { + auto grad_input = at::_sparse_coo_tensor_unsafe(indices, grad_values, sizes); + return grad_input._coalesced_(input_is_coalesced); +} + +Tensor sparse_constructor_values_backward(const Tensor& sparse_grad_out, const Tensor& indices, IntList values_shape) { + // TODO: improve this backward by writing a kernel (maybe) + auto sparse_dim = indices.size(0); + if (sparse_dim == 0) { + return at::zeros(values_shape, sparse_grad_out._values().options()); + } + auto dense_grad = sparse_grad_out.to_dense(); + auto full_size = sparse_grad_out.sizes(); + auto flattened_grad_shape = values_shape.vec(); + flattened_grad_shape[0] = at::prod_intlist(full_size.slice(0, sparse_dim)); + auto flattened_dense_grad = dense_grad.view(flattened_grad_shape); + auto flattened_indices = at::sparse::flatten_indices(indices, full_size); + return flattened_dense_grad.index_select(0, flattened_indices); +} + } // anonymous namespace ${autograd_function_definitions} diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py index d5a876e4ca83e..3e3114841a8e1 100644 --- a/torch/_tensor_docs.py +++ b/torch/_tensor_docs.py @@ -662,6 +662,17 @@ def add_docstr_all(method, docstr): Returns the address of the first element of :attr:`self` tensor. """) +add_docstr_all('dense_dim', + r""" +dense_dim() -> int + +If :attr:`self` is a sparse COO tensor (i.e., with ``torch.sparse_coo`` layout), +this returns a the number of dense dimensions. Otherwise, this throws an +error. + +See also :meth:`Tensor.sparse_dim`. +""") + add_docstr_all('diag', r""" diag(diagonal=0) -> Tensor @@ -980,9 +991,24 @@ def add_docstr_all(method, docstr): See :func:`torch.gesv` """) +add_docstr_all('indices', + r""" +indices() -> Tensor + +If :attr:`self` is a sparse COO tensor (i.e., with ``torch.sparse_coo`` layout), +this returns a view of the contained indices tensor. Otherwise, this throws an +error. + +See also :meth:`Tensor.values`. + +.. note:: + This method can only be called on a coalesced sparse tensor. See + :meth:`Tensor.coalesce` for details. +""") + add_docstr_all('get_device', r""" -get_device(A) -> Device ordinal (Integer) +get_device() -> Device ordinal (Integer) For CUDA tensors, this function returns the device ordinal of the GPU on which the tensor resides. For CPU tensors, an error is thrown. @@ -995,6 +1021,21 @@ def add_docstr_all(method, docstr): >>> x.cpu().get_device() # RuntimeError: get_device is not implemented for type torch.FloatTensor """) +add_docstr_all('values', + r""" +values() -> Tensor + +If :attr:`self` is a sparse COO tensor (i.e., with ``torch.sparse_coo`` layout), +this returns a view of the contained values tensor. Otherwise, this throws an +error. + +See also :meth:`Tensor.indices`. + +.. note:: + This method can only be called on a coalesced sparse tensor. See + :meth:`Tensor.coalesce` for details. +""") + add_docstr_all('gt', r""" gt(other) -> Tensor @@ -1462,7 +1503,7 @@ def callable(a, b) -> number Same as :meth:`Tensor.narrow` except returning a copy rather than shared storage. This is primarily for sparse tensors, which do not have a shared-storage narrow method. Calling ```narrow_copy`` -with ```dimemsion > self._sparseDims()``` will return a copy with the +with ```dimemsion > self.sparse_dim()``` will return a copy with the relevant dense dimension narrowed, and ```self.shape``` updated accordingly. """) @@ -2094,6 +2135,17 @@ def callable(a, b) -> number See :func:`torch.sort` """) +add_docstr_all('sparse_dim', + r""" +sparse_dim() -> int + +If :attr:`self` is a sparse COO tensor (i.e., with ``torch.sparse_coo`` layout), +this returns a the number of sparse dimensions. Otherwise, this throws an +error. + +See also :meth:`Tensor.dense_dim`. +""") + add_docstr_all('sqrt', r""" sqrt() -> Tensor diff --git a/torch/csrc/cuda/comm.cpp b/torch/csrc/cuda/comm.cpp index d4259a70baeb7..5ba6c55cfe1af 100644 --- a/torch/csrc/cuda/comm.cpp +++ b/torch/csrc/cuda/comm.cpp @@ -82,6 +82,7 @@ tensor_list2d broadcast_coalesced(TensorList tensors, IntList devices, size_t bu o.reserve(tensors.size()); unique_type_checker type_checker; + at::DeviceGuard device_guard(devices[0]); for (auto & chunk : utils::take_tensors(tensors, buffer_size)) { auto & type = chunk.type(); type_checker.show(type); @@ -92,7 +93,7 @@ tensor_list2d broadcast_coalesced(TensorList tensors, IntList devices, size_t bu std::vector broadcast_values = broadcast(flat_tuple.second, devices); results.reserve(devices.size()); for (size_t i = 1, num_devices = devices.size(); i < num_devices; ++i) { - at::DeviceGuard device_guard(devices[i]); + device_guard.set_index(devices[i]); auto & device_outputs = outputs[i]; auto & inds = broadcast_indices[i]; auto & vals = broadcast_values[i]; @@ -100,7 +101,6 @@ tensor_list2d broadcast_coalesced(TensorList tensors, IntList devices, size_t bu device_outputs.push_back(std::move(t)); } } else { - at::DeviceGuard device_guard(devices[0]); std::vector results = broadcast(utils::flatten_dense_tensors(chunk.tensors), devices); for (size_t i = 1, num_devices = devices.size(); i < num_devices; ++i) { diff --git a/torch/csrc/utils/tensor_flatten.cpp b/torch/csrc/utils/tensor_flatten.cpp index 29414368c7bd5..be01a66b5996b 100644 --- a/torch/csrc/utils/tensor_flatten.cpp +++ b/torch/csrc/utils/tensor_flatten.cpp @@ -85,8 +85,11 @@ std::vector unflatten_sparse_tensors( std::vector outputs; outputs.reserve(tensors.size()); - for (size_t i = 0, num_tensors = tensors.size(); i < num_tensors; ++i) - outputs.emplace_back(at::_sparse_coo_tensor_unsafe(indices[i], values[i], tensors[i].sizes())); + for (size_t i = 0, num_tensors = tensors.size(); i < num_tensors; ++i) { + auto &ref_t = tensors[i]; + auto t = at::_sparse_coo_tensor_unsafe(indices[i], values[i], ref_t.sizes()); + outputs.emplace_back(t._coalesced_(ref_t.is_coalesced())); + } return outputs; } diff --git a/torch/csrc/utils/tensor_new.cpp b/torch/csrc/utils/tensor_new.cpp index 628409b18df98..faffd278841c3 100644 --- a/torch/csrc/utils/tensor_new.cpp +++ b/torch/csrc/utils/tensor_new.cpp @@ -291,12 +291,12 @@ Tensor legacy_sparse_tensor_ctor(const Type& type, PyObject* args, PyObject* kwa auto deviceOptional = r.deviceOptional(2); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), type.options()); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1)); } else if (r.idx == 3) { auto deviceOptional = r.deviceOptional(3); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2), type.options()); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2)); } else if (r.idx == 4) { PyObject* arg = r.pyobject(0); auto deviceOptional = r.deviceOptional(1); @@ -335,14 +335,14 @@ Tensor legacy_sparse_tensor_new(const Type& type, PyObject* args, PyObject* kwar auto deviceOptional = r.deviceOptional(2); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), type.options()); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1)); } else if (r.idx == 3) { // Note: this signature doesn't have a dtype, even though it has a device; it probably shouldn't // have a device (we should infer it). auto deviceOptional = r.deviceOptional(3); check_legacy_ctor_device(type, deviceOptional); at::DeviceGuard device_guard(deviceOptional); - return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2), type.options()); + return at::sparse_coo_tensor(r.tensor(0), r.tensor(1), r.intlist(2)); } else if (r.idx == 4) { PyObject* arg = r.pyobject(0); auto deviceOptional = r.deviceOptional(1); @@ -485,8 +485,7 @@ Tensor sparse_coo_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs // if no dtype provided, infer type based on value type. const auto& index_type = values.type().toScalarType(kLong); Tensor indices = internal_new_from_data(index_type, r.deviceOptional(3), r.pyobject(0), false, true, false); - const auto& sparse_type_to_use = values.type().toBackend(values.type().is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU); - return at::sparse_coo_tensor(indices, values, sparse_type_to_use.options()).set_requires_grad(r.toBool(4)); + return at::sparse_coo_tensor(indices, values).set_requires_grad(r.toBool(4)); } else if (r.idx == 1) { bool type_inference = r.isNone(3); const auto& sparse_type = typeWithDefault(r, 3, 4, default_sparse_type); @@ -495,8 +494,7 @@ Tensor sparse_coo_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs Tensor values = internal_new_from_data(dense_type, r.deviceOptional(4), r.pyobject(1), false, true, type_inference); const auto& index_type = values.type().toScalarType(kLong); Tensor indices = internal_new_from_data(index_type, r.deviceOptional(4), r.pyobject(0), false, true, false); - const auto& sparse_type_to_use = values.type().toBackend(values.type().is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU); - return at::sparse_coo_tensor(indices, values, r.intlist(2), sparse_type_to_use.options()).set_requires_grad(r.toBool(5)); + return at::sparse_coo_tensor(indices, values, r.intlist(2)).set_requires_grad(r.toBool(5)); } else if (r.idx == 2) { const auto& sparse_type_to_use = typeWithDefault(r, 1, 2, default_sparse_type); at::DeviceGuard device_guard(r.device(2)); From 7d8befc2ca0a83bba0d682ccd27648eb8a6e2ab7 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 16 Oct 2018 17:33:12 -0400 Subject: [PATCH 12/23] Use function variant with option dispatch; favor options dispatch over tensor dispatch --- aten/src/ATen/core/aten_interned_strings.h | 4 +- aten/src/ATen/function_wrapper.py | 27 ++++--- aten/src/ATen/native/TensorShape.cpp | 1 - aten/src/ATen/native/native_functions.yaml | 12 ++- aten/src/ATen/native/sparse/SparseTensor.cpp | 81 +++++++++----------- tools/autograd/derivatives.yaml | 2 +- tools/autograd/gen_python_functions.py | 3 +- 7 files changed, 59 insertions(+), 71 deletions(-) diff --git a/aten/src/ATen/core/aten_interned_strings.h b/aten/src/ATen/core/aten_interned_strings.h index 8045fe3458f0d..0ee9d337e9158 100644 --- a/aten/src/ATen/core/aten_interned_strings.h +++ b/aten/src/ATen/core/aten_interned_strings.h @@ -117,6 +117,8 @@ _(aten, _sin) \ _(aten, _sinh) \ _(aten, _sparseDims) \ _(aten, _sparse_add) \ +_(aten, _sparse_coo_tensor_with_dims) \ +_(aten, _sparse_coo_tensor_with_dims_and_tensors) \ _(aten, _sparse_coo_tensor_unsafe) \ _(aten, _sparse_dense_add) \ _(aten, _sparse_div_scalar) \ @@ -593,8 +595,6 @@ _(aten, softshrink_backward) \ _(aten, softshrink_forward) \ _(aten, sort) \ _(aten, sparse_coo_tensor) \ -_(aten, sparse_coo_tensor_with_dims) \ -_(aten, sparse_coo_tensor_with_dims_and_tensors) \ _(aten, sparse_mask) \ _(aten, sparse_resize) \ _(aten, sparse_resize_and_clear) \ diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py index 1d68fd365e175..ad600b467758c 100644 --- a/aten/src/ATen/function_wrapper.py +++ b/aten/src/ATen/function_wrapper.py @@ -110,7 +110,7 @@ def TypedDict(name, attrs, total=True): # type: ignore TYPE_DERIVED_DEFINITION_NATIVE = CodeTemplate("""\ ${return_type} ${Type}::${api_name}(${type_method_formals}) const { ${device_guard_declaration} - ${return_call} at::native::${native_type_method_dispatch}(/* actuals */ ${type_derived_call_actuals}); + ${return_call} at::native::${native_type_method_dispatch}(/* actuals */ ${actuals}); } """) TYPE_DERIVED_DEFINITION_NATIVE_MISSING = CodeTemplate("""\ @@ -1044,14 +1044,20 @@ def find_formal(formal_name, formals): return formal return None + assert find_formal('Type', formals) is None, \ + "Found Type argument in {}({}). Use TensorOptions instead.".format( + option['name'], ", ".join(option['method_formals_with_defaults'])) + type_method_dispatch = option['type_method_definition_dispatch'] - dispatch_tensor = find_dispatch_tensor(formals) - # we only dispatch via options if there is backend-specific dispatch (otherwise it's a factory function that - # can dispatch directly to the native function). backend_dispatch = isinstance(type_method_dispatch, dict) + + # We only dispatch via options if there is backend-specific dispatch + # (otherwise it's a factory function that can dispatch directly to the + # native function). dispatch_options = (find_formal('TensorOptions', formals) - if not dispatch_tensor and backend_dispatch - else None) + if backend_dispatch else None) + # Only dispatch via tensor if there is no Options argument + dispatch_tensor = None if dispatch_options else find_dispatch_tensor(formals) option['type_method_formals'] = [format_formal(f) for f in formals] option['type_method_actuals'] = [f['name'] for f in formals] @@ -1577,15 +1583,8 @@ def process_native(option): TYPE_DERIVED_DEFINITION_NATIVE_MISSING.substitute(env)) else: option['native_type_method_dispatch'] = native_dispatch - type_derived_call_actuals = [] - for actual, arg in zip(option['actuals'], option['arguments']): - if arg.get('is_type_dispatched', False): - type_derived_call_actuals.append('*this') - else: - type_derived_call_actuals.append(actual) type_object_definitions.append( - TYPE_DERIVED_DEFINITION_NATIVE.substitute( - env, type_derived_call_actuals=type_derived_call_actuals)) + TYPE_DERIVED_DEFINITION_NATIVE.substitute(env)) for declaration in declarations: for option in declaration['options']: diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index 5f2b8af406b84..48d9d9a9083c5 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 9584c51746c62..45244a80c508b 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -2019,22 +2019,20 @@ # the default would never make sense. - func: sparse_coo_tensor(IntList size, *, TensorOptions options) -> Tensor -- func: sparse_coo_tensor(IndexTensor indices, Tensor values) -> Tensor +- func: sparse_coo_tensor(IndexTensor indices, Tensor values, *, TensorOptions options={}) -> Tensor -- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size) -> Tensor +- func: sparse_coo_tensor(IndexTensor indices, Tensor values, IntList size, *, TensorOptions options={}) -> Tensor -- func: _sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size) -> Tensor +- func: _sparse_coo_tensor_unsafe(IndexTensor indices, Tensor values, IntList size, *, TensorOptions options={}) -> Tensor -- func: sparse_coo_tensor_with_dims(Type sparse_dtype, int64_t sparse_dim, int64_t dense_dim, IntList size) -> Tensor - variants: [] +- func: _sparse_coo_tensor_with_dims(int64_t sparse_dim, int64_t dense_dim, IntList size, *, TensorOptions options) -> Tensor dispatch: SparseCPU: new_with_dims_sparse SparseCUDA: new_with_dims_sparse requires_tensor: True -- func: sparse_coo_tensor_with_dims_and_tensors(Type sparse_dtype, int64_t sparse_dim, int64_t dense_dim, IntList size, Tensor indices, Tensor values) -> Tensor - variants: [] +- func: _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, IntList size, Tensor indices, Tensor values, *, TensorOptions options) -> Tensor dispatch: SparseCPU: new_with_dims_and_tensor_sparse SparseCUDA: new_with_dims_and_tensor_sparse diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index 85edc40588b62..9050aab504956 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -68,7 +68,6 @@ Tensor values_sparse(const Tensor& self) { /*** Helper methods ***/ -/* Empty init */ SparseTensor new_sparse(const TensorOptions& options) { AT_ASSERT(!options.is_variable()); AT_ASSERT(options.layout() == kSparse); @@ -82,11 +81,30 @@ SparseTensor new_sparse(const TensorOptions& options) { type_id, scalarTypeToTypeMeta(options.dtype())); } +/** Empty init **/ -/** Actual dispatched methods ***/ +Tensor empty_sparse(IntList size, const TensorOptions& options) { + AT_CHECK(size.size() != 0, + "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ + or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); + AT_ASSERT(!options.is_variable()); + AT_ASSERT(options.layout() == kSparse); + TensorTypeId type_id; + if (options.device().type() == kCUDA) { + type_id = SparseCUDATensorId(); + } else { + type_id = SparseCPUTensorId(); + } + auto tensor = Tensor(c10::make_intrusive(type_id, scalarTypeToTypeMeta(options.dtype()))); + get_sparse_impl(tensor)->resize_and_clear_(size.size(), 0, size); + return tensor; +} + + +/** Actual dispatched creation methods ***/ -SparseTensor new_with_dims_sparse(const SparseType& sparse_dtype, int64_t sparse_dim, int64_t dense_dim, ArrayRef size) { - SparseTensor self = new_sparse(sparse_dtype); +SparseTensor new_with_dims_sparse(int64_t sparse_dim, int64_t dense_dim, ArrayRef size, const TensorOptions& options) { + SparseTensor self = new_sparse(options); AT_CHECK(size.size() != 0, "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); @@ -96,27 +114,28 @@ or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_ // Does NOT make copies of indices and values SparseTensor new_with_dims_and_tensor_sparse( - const SparseType& sparse_dtype, int64_t sparse_dim, int64_t dense_dim, ArrayRef size, const LongTensor& indices, - const Tensor& values) { - SparseTensor self = new_sparse(sparse_dtype); + const Tensor& values, + const TensorOptions& options) { + SparseTensor self = new_sparse(options); get_sparse_impl(self)->resize_(sparse_dim, dense_dim, size); alias_into_sparse(self, indices, values); return self; } +/** Public creation API that dispatch to methods above **/ + /* Shape init */ Tensor sparse_coo_tensor(ArrayRef size, const TensorOptions& options) { TensorOptions toptions = options; - // Use type-dispatch to get rid of variable type. - return at::getType(toptions.layout(at::kSparse)).sparse_coo_tensor_with_dims(size.size(), 0, size); + return at::_sparse_coo_tensor_with_dims(size.size(), 0, size, options.layout(at::kSparse)); } /* Pointer-copy init */ -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_) { +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const TensorOptions& options) { AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); @@ -169,38 +188,12 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_) { computed_sizes[static_cast(sparse_dim + d)] = values.size(d+1); } - const TensorOptions options = values.options().layout(at::kSparse); - return at::getType(options).sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, computed_sizes, indices, values); -} - -SparseTensor new_with_dims_and_size_sparse(int64_t sparseDims, int64_t denseDims, ArrayRef size, const TensorOptions& options) { - SparseTensor self = new_sparse(options); - AT_CHECK(size.size() != 0, - "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ -or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); - _get_sparse_impl(self)->resize_and_clear_(sparseDims, denseDims, size); - return self; -} - -Tensor empty_sparse(IntList size, const TensorOptions& options) { - AT_CHECK(size.size() != 0, - "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ - or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); - AT_ASSERT(!options.is_variable()); - AT_ASSERT(options.layout() == kSparse); - TensorTypeId type_id; - if (options.device().type() == kCUDA) { - type_id = SparseCUDATensorId(); - } else { - type_id = SparseCPUTensorId(); - } - auto tensor = Tensor(c10::make_intrusive(type_id, scalarTypeToTypeMeta(options.dtype()))); - _get_sparse_impl(tensor)->resize_and_clear_(size.size(), 0, size); - return tensor; + const TensorOptions sparse_options = values.options().layout(at::kSparse); + return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, computed_sizes, indices, values, sparse_options); } // NB: Got rid of the sizes == NULL case -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef size) { +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); @@ -244,8 +237,8 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef< } } - const TensorOptions options = values.options().layout(at::kSparse); - return at::getType(options).sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values); + const TensorOptions sparse_options = values.options().layout(at::kSparse); + return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values, sparse_options); } // NOTE: _sparse_coo_tensor_unsafe() differs from sparse_coo_tensor() @@ -253,7 +246,7 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef< // copy from CUDA to CPU. However, this function should ONLY be used where we know that the indices // are guaranteed to be within bounds. // NB: Got rid of the size == NULL case -Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values_, ArrayRef size) { +Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); @@ -268,8 +261,8 @@ Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values_, A int64_t sparse_dim = indices.size(0); int64_t dense_dim = values.dim() - 1; - const TensorOptions options = values.options().layout(at::kSparse); - return at::getType(options).sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values); + const TensorOptions sparse_options = values.options().layout(at::kSparse); + return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values, sparse_options); } // NB: Deleted newWithSizeNd variants diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index e3569a3c3fd2c..81065d6df1702 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -803,7 +803,7 @@ self: not_implemented("sparse_mask") mask: not_implemented("sparse_mask") -- name: sparse_coo_tensor_with_dims_and_tensors(Type sparse_dtype, int64_t sparse_dim, int64_t dense_dim, IntList size, Tensor indices, Tensor values) +- name: _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, IntList size, Tensor indices, Tensor values, TensorOptions options) values: sparse_constructor_values_backward(grad, indices, values.sizes()) - name: _standard_gamma(Tensor self, Generator generator) diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py index 71bff84d0c3c2..5376d1b638490 100644 --- a/tools/autograd/gen_python_functions.py +++ b/tools/autograd/gen_python_functions.py @@ -19,8 +19,7 @@ SKIP_PYTHON_BINDINGS = [ 'alias', 'contiguous', 'is_cuda', 'is_sparse', 'size', 'stride', '.*_backward', '.*_backward_(out|input|weight|bias)', '.*_forward', - '.*_forward_out', '_unsafe_view', 'tensor', - 'sparse_coo_tensor', 'sparse_coo_tensor_with_dims.*', + '.*_forward_out', '_unsafe_view', 'tensor', '_?sparse_coo_tensor.*', '_arange.*', '_range.*', '_linspace.*', '_logspace.*', '_sparse_add.*', '_sparse_div.*', '_sparse_mul.*', '_sparse_sub.*', 'index', From af12f598934e89a7144b4959640f51a79f189176 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 16 Oct 2018 18:23:58 -0400 Subject: [PATCH 13/23] arg checking and de-duplicate code --- aten/src/ATen/native/sparse/SparseTensor.cpp | 84 ++++++++------------ 1 file changed, 31 insertions(+), 53 deletions(-) diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index 9050aab504956..efe2ad90e0fcc 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -81,26 +81,6 @@ SparseTensor new_sparse(const TensorOptions& options) { type_id, scalarTypeToTypeMeta(options.dtype())); } -/** Empty init **/ - -Tensor empty_sparse(IntList size, const TensorOptions& options) { - AT_CHECK(size.size() != 0, - "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \ - or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor"); - AT_ASSERT(!options.is_variable()); - AT_ASSERT(options.layout() == kSparse); - TensorTypeId type_id; - if (options.device().type() == kCUDA) { - type_id = SparseCUDATensorId(); - } else { - type_id = SparseCPUTensorId(); - } - auto tensor = Tensor(c10::make_intrusive(type_id, scalarTypeToTypeMeta(options.dtype()))); - get_sparse_impl(tensor)->resize_and_clear_(size.size(), 0, size); - return tensor; -} - - /** Actual dispatched creation methods ***/ SparseTensor new_with_dims_sparse(int64_t sparse_dim, int64_t dense_dim, ArrayRef size, const TensorOptions& options) { @@ -128,24 +108,41 @@ SparseTensor new_with_dims_and_tensor_sparse( /** Public creation API that dispatch to methods above **/ + +/** Empty init **/ +Tensor empty_sparse(IntList size, const TensorOptions& options) { + return new_with_dims_sparse(size.size(), 0, size, options); +} + /* Shape init */ Tensor sparse_coo_tensor(ArrayRef size, const TensorOptions& options) { - TensorOptions toptions = options; return at::_sparse_coo_tensor_with_dims(size.size(), 0, size, options.layout(at::kSparse)); } /* Pointer-copy init */ -Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const TensorOptions& options) { + +// argument checking helper +static inline +void tensor_init_arg_check(const Tensor& indices, const Tensor& values, const TensorOptions& options) { AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); - AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); + AT_CHECK(!values.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values.layout()); + AT_CHECK(options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); +} - Tensor values; - if (values_.dim() == 0) { +// values tensor preprocessing helper +static inline +void transform_values_if_needed(const Tensor& values, const TensorOptions& options) { + if (values.dim() == 0) { // Mimic Numpy behavior here and treat it as a 1D tensor - values = values_.expand({1}); + return values.to(options.layout(kStrided)).expand_({1}); } else { - values = values_; + return values.to(options.layout(kStrided)); } +} + +Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const TensorOptions& options) { + tensor_init_arg_check(indices, values_, options); + Tensor values = transform_values_if_needed(values_, options); // If sizes are not given, it is inferred as max index of each dim. int64_t sparse_dim = indices.size(0); @@ -188,22 +185,13 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const Ten computed_sizes[static_cast(sparse_dim + d)] = values.size(d+1); } - const TensorOptions sparse_options = values.options().layout(at::kSparse); - return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, computed_sizes, indices, values, sparse_options); + return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, computed_sizes, indices, values, options); } // NB: Got rid of the sizes == NULL case Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { - AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); - AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); - - Tensor values; - if (values_.dim() == 0) { - // Mimic Numpy behavior here and treat it as a 1D tensor - values = values_.expand({1}); - } else { - values = values_; - } + tensor_init_arg_check(indices, values_, options); + Tensor values = transform_values_if_needed(values_, options); int64_t sparse_dim = indices.size(0); int64_t dense_dim = values.dim() - 1; @@ -237,8 +225,7 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef< } } - const TensorOptions sparse_options = values.options().layout(at::kSparse); - return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values, sparse_options); + return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values, options); } // NOTE: _sparse_coo_tensor_unsafe() differs from sparse_coo_tensor() @@ -247,22 +234,13 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef< // are guaranteed to be within bounds. // NB: Got rid of the size == NULL case Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { - AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); - AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); - - Tensor values; - if (values_.dim() == 0) { - // Mimic Numpy behavior here and treat it as a 1D tensor - values = values_.expand({1}); - } else { - values = values_; - } + tensor_init_arg_check(indices, values_, options); + Tensor values = transform_values_if_needed(values_, options); int64_t sparse_dim = indices.size(0); int64_t dense_dim = values.dim() - 1; - const TensorOptions sparse_options = values.options().layout(at::kSparse); - return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values, sparse_options); + return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values, options); } // NB: Deleted newWithSizeNd variants From 39592ee99cec11910e6f83fe78ef492e4c92bb58 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Wed, 17 Oct 2018 19:44:22 -0400 Subject: [PATCH 14/23] revert grad mode stuff --- tools/autograd/gen_variable_type.py | 12 ++++++------ torch/csrc/autograd/variable.h | 10 +++++++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index 8e46aa0372734..16af5c65d04a9 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -471,13 +471,13 @@ def wrap_output(call): if not isinstance(view_info, dict): if len(differentiable_output_vars) == len(tensor_output_vars): # all outputs are differentiable - return 'as_view({}, {}, GradMode::is_enabled())'.format(view_info, call), [] + return 'as_view({}, {}, true)'.format(view_info, call), [] elif len(differentiable_output_vars) == 0: # no output is differentiable return 'as_view({}, {}, false)'.format(view_info, call), [] else: # some of the outputs are differentiable - # need to expand to dict mode + # need to expand to dict mode, i.e., one entry per output base_name = view_info view_info_dict = {} for i, return_info in enumerate(returns): @@ -487,17 +487,17 @@ def wrap_output(call): view_info_dict = view_info def wrap_view_single(output_var, base_var): - fmt = '{output_var} = as_view({base_var}, {output_var}, {potentially_tracks_history});' + fmt = '{output_var} = as_view({base_var}, {output_var}, {is_differentiable});' if output_var in differentiable_output_vars: # If `GradMode::is_enabled()` is False, this is a # non-differentiable view. Gradients should not flow through. - potentially_tracks_history = 'GradMode::is_enabled()' + is_differentiable = 'true' else: # This output is non-differentiable, so it is a # non-differentiable view. Gradients should not flow through. - potentially_tracks_history = 'false' + is_differentiable = 'false' return fmt.format(output_var=output_var, base_var=base_var, - potentially_tracks_history=potentially_tracks_history) + is_differentiable=is_differentiable) extra_wrapping_stmts = [] for output_idx, return_info in enumerate(returns): diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index 3b97435914477..48cc918ca288c 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -438,9 +438,13 @@ struct TORCH_API Variable::Impl : public at::TensorImpl { /// view relation via DifferentiableViewImpl in autograd, the views will be /// using usual Variable::Impl and just share the version counters with the base /// Variables. -/// Some examples are: -/// 1. Views created from .detach(), -/// 2. Views created when GradMode::is_enabled() = false. +/// Such views include: +/// 1. Views created from .detach() +/// 2. Views that are non-differentiable by its nature. +/// E.g., `sparse_tensor.indices()` is a integral view on a (possibly) +/// floating point tensor. +/// See top of `derivatives.yaml` on how to specify that outputs of a +/// function are non-differentiable. /// These are called non-differentiable views as the gradients do not flow /// through the view relation. /// Relevant logic for non-differentiable views is implemented in From 93bb32decba8ef4fe609d50871b5a3c6816ce647 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Sun, 21 Oct 2018 14:32:12 -0400 Subject: [PATCH 15/23] Make _values non-differentiable; Add note on why Add has_* for TensorOptions Fix Python sparse_coo_tensor entry Fix a CUDA coalesce error; add tests --- aten/src/ATen/core/TensorOptions.h | 25 +++++ aten/src/ATen/native/native_functions.yaml | 91 +++++++++++++------ aten/src/ATen/native/sparse/SparseTensor.cpp | 65 ++++++------- .../native/sparse/cuda/SparseCUDATensor.cu | 39 ++++---- test/common_utils.py | 2 + test/test_autograd.py | 50 +++++----- test/test_sparse.py | 10 ++ tools/autograd/derivatives.yaml | 10 +- tools/autograd/templates/Functions.cpp | 71 +++++++-------- torch/csrc/utils/tensor_new.cpp | 37 ++++---- 10 files changed, 230 insertions(+), 170 deletions(-) diff --git a/aten/src/ATen/core/TensorOptions.h b/aten/src/ATen/core/TensorOptions.h index 9435374d29ab2..362bd712e6aff 100644 --- a/aten/src/ATen/core/TensorOptions.h +++ b/aten/src/ATen/core/TensorOptions.h @@ -181,6 +181,11 @@ struct CAFFE2_API TensorOptions { return has_device_ ? device_ : getDefaultTensorOptions().device(); } + /// Returns whether the device is specified. + bool has_device() const noexcept { + return has_device_; + } + /// Returns the device of the `TensorOptions`, or `c10::nullopt` if /// device is not specified. optional device_opt() const noexcept { @@ -197,6 +202,11 @@ struct CAFFE2_API TensorOptions { return has_dtype_ ? dtype_ : getDefaultTensorOptions().dtype(); } + /// Returns whether the dtype is specified. + bool has_dtype() const noexcept { + return has_dtype_; + } + /// Returns the dtype of the `TensorOptions`, or `c10::nullopt` if /// device is not specified. optional dtype_opt() const noexcept { @@ -208,6 +218,11 @@ struct CAFFE2_API TensorOptions { return has_layout_ ? layout_ : getDefaultTensorOptions().layout(); } + /// Returns whether the layout is specified. + bool has_layout() const noexcept { + return has_layout_; + } + /// Returns the layout of the `TensorOptions`, or `c10::nullopt` if /// layout is not specified. optional layout_opt() const noexcept { @@ -219,6 +234,11 @@ struct CAFFE2_API TensorOptions { return has_requires_grad_ ? requires_grad_ : getDefaultTensorOptions().requires_grad(); } + /// Returns whether the `requires_grad` is specified. + bool has_requires_grad() const noexcept { + return has_requires_grad_; + } + /// Returns the `requires_grad` property of the `TensorOptions`, or /// `c10::nullopt` if `requires_grad` is not specified. optional requires_grad_opt() const noexcept { @@ -231,6 +251,11 @@ struct CAFFE2_API TensorOptions { return has_is_variable_ ? is_variable_ : getDefaultTensorOptions().is_variable(); } + /// Returns whether the `is_variable` is specified. + bool has_is_variable() const noexcept { + return has_is_variable_; + } + /// Returns the `is_variable` property of the `TensorOptions`, or /// `c10::nullopt` if `is_variable` is not specified. optional is_variable_opt() const noexcept { diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 45244a80c508b..a95cfc3d817df 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -1932,6 +1932,33 @@ # NOTE [ Sparse: autograd and API ] # +# +# Sparse Tensor Constructors +# ~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# The API entry points to sparse tensor construction should be +# `sparse_coo tensor` and `_sparse_coo_tensor_unsafe`. Depending on whether the +# indices and values tensors are given, they eventually dispatch to either +# `sparse_coo_tensor_with_dims` or `sparse_coo_tensor_with_dims_and_tensors`. +# +# The autograd support for ctor is implement on `sparse_coo_tensor_with_dims_and_tensors`. +# +# The API methods `sparse_coo tensor` and `_sparse_coo_tensor_unsafe` +# **must not** have specific type dispatches because otherwise codegen will +# consider them as abstract methods (see Note [Abstract ATen methods]), dispatch +# using **Tensor** type, and thus lose autograd tracking on the actual method +# they dispatch to, e.g., `sparse_coo_tensor_with_dims_and_tensors`. +# +# The actual ctors `sparse_coo_tensor_with_dims` and `sparse_coo_tensor_with_dims_and_tensors`, +# on the other hand, need to create `SparseTensorImpl` and know nothing about +# how `VariableType`s work. So they need to be dispatched using Tensor types. +# We thus put `requires_tensor=True` to ensure that `VariableType` will unwrap +# the given variables and call with the Tensor type. +# +# +# Sparse Methods API Design +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# # Goals: 1. Flexible API for users to write custom sparse ops # 2. ctor and member accessor with autograd support # @@ -1963,17 +1990,43 @@ # + `dense_dim()`: number of dense dimensions # + `is_coalesced()`: whether the sparse tensor is coalesced # -# `_indices` and `_values` should returns the raw indices and values dense +# `_indices()` and `_values()` should returns the raw indices and values dense # tensors within a sparse tensor. They can be quite unsafe with inplace -# operations like `t_`, and exposes uncoalesced indices and values. The public -# recommended API is `indices` and `values`, both of which first check that the -# tensor is coalesced and return views on those tensors. +# operations like `t_()`, and exposes uncoalesced indices and values. The public +# recommended API is `indices()` and `values()`, both of which first check that +# the tensor is coalesced and return views on those tensors. # -# Autograd support: # -# Autograd is supported on `_values`, `values` and sparse tensor ctor with -# indices and values tensors. E.g., `torch.sparse_coo_tensor(i, v).values().sum()` -# is differentiable w.r.t. `v`. +# Autograd Support +# ~~~~~~~~~~~~~~~~ +# +# Autograd is supported on `values()` and sparse tensor ctor with indices and +# values tensors. E.g., `torch.sparse_coo_tensor(i, v).values().sum()` is +# differentiable w.r.t. `v`. +# +# NB: The `values()` and `_values()` operators are special in that they are +# layout-aware, i.e., the output depends not just on the data it represents, but +# also on the input layout details (in this case, the `indices` tensor). See +# NOTE [ as_strided Backward and layout-aware/agnostic autograd ] in Functions.cpp +# for discussion on layout-aware vs layout-agnostic autograd. Since PyTorch ops +# operate in the layout-agnostic mode, similar to `as_strided`, backward of +# these two operators need to consider them in a layout-agnostic way: +# + `values()`: +# Input is coalesced. +# We just pretend having `input.indices()` as an additional argument +# `input_indices`, then forward is similar to +# `input.to(kStrided).index_select(input_indices)` regardless of the layout. +# Note that `values()` normally is layout-aware even if we constrain +# ourselves on sparse inputs since it may include all zeros values entries +# as "present" entries. +# + `_values()`: +# Input may be uncoalesced. +# It is not straightforward to construct a layout-agnostic version because +# duplicate indices entries may exist and additional parameterization is +# needed to distribute the value into different values entries. Furthermore, +# this op is intended to provide ways to write custom sparse ops, rather +# than being used in autograd graph, so it is marked as *non-differentiable* +# in derivatives.yaml. # # Before reading the following, see NOTE [ Autograd Variable Views ] in # variable.h for details on views that are tracked by autograd, and views that @@ -1993,28 +2046,6 @@ # sparse tensor. -# NOTE [ Sparse tensor ctors ] -# -# The API entry points to sparse tensor construction should be -# `sparse_coo tensor` and `_sparse_coo_tensor_unsafe`. Depending on whether the -# indices and values tensors are given, they eventually dispatch to either -# `sparse_coo_tensor_with_dims` or `sparse_coo_tensor_with_dims_and_tensors`. -# -# The autograd support for ctor is implement on `sparse_coo_tensor_with_dims_and_tensors`. -# -# The API methods `sparse_coo tensor` and `_sparse_coo_tensor_unsafe` -# **must not** have specific type dispatches because otherwise codegen will -# consider them as abstract methods (see Note [Abstract ATen methods]), dispatch -# using **Tensor** type, and thus lose autograd tracking on the actual method -# they dispatch to, e.g., `sparse_coo_tensor_with_dims_and_tensors`. -# -# The actual ctors `sparse_coo_tensor_with_dims` and `sparse_coo_tensor_with_dims_and_tensors`, -# on the other hand, need to create `SparseTensorImpl` and know nothing about -# how `VariableType`s work. So they need to be dispatched using Tensor types. -# We thus put `requires_tensor=True` to ensure that `VariableType` will unwrap -# the given variables and call with the Tensor type. - - # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given # the default would never make sense. - func: sparse_coo_tensor(IntList size, *, TensorOptions options) -> Tensor diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index efe2ad90e0fcc..73a8b6b8343e8 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -64,6 +64,7 @@ Tensor values_sparse(const Tensor& self) { /****************************************************************************** * creation methods + * See NOTE [ Sparse: autograd and API ] for details ******************************************************************************/ /*** Helper methods ***/ @@ -108,7 +109,6 @@ SparseTensor new_with_dims_and_tensor_sparse( /** Public creation API that dispatch to methods above **/ - /** Empty init **/ Tensor empty_sparse(IntList size, const TensorOptions& options) { return new_with_dims_sparse(size.size(), 0, size, options); @@ -121,28 +121,29 @@ Tensor sparse_coo_tensor(ArrayRef size, const TensorOptions& options) { /* Pointer-copy init */ -// argument checking helper -static inline -void tensor_init_arg_check(const Tensor& indices, const Tensor& values, const TensorOptions& options) { - AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); - AT_CHECK(!values.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values.layout()); - AT_CHECK(options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); -} - -// values tensor preprocessing helper -static inline -void transform_values_if_needed(const Tensor& values, const TensorOptions& options) { - if (values.dim() == 0) { - // Mimic Numpy behavior here and treat it as a 1D tensor - return values.to(options.layout(kStrided)).expand_({1}); - } else { - return values.to(options.layout(kStrided)); +// helpers +namespace { + // Helper called in **every** sparse_coo_tensor ctor with indices and + // values tensors. + static inline Tensor argcheck_and_expand_values_if_needed( + const LongTensor& indices_, const Tensor& values_, const TensorOptions& options) { + // arg checking + AT_CHECK(!indices_.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices_.layout()); + AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); + AT_CHECK(!options.has_layout() || options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); + + // expand + if (values_.dim() == 0) { + // Mimic Numpy behavior here and treat it as a 1D tensor + return values_.expand({1}); + } else { + return values_; + } } } Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const TensorOptions& options) { - tensor_init_arg_check(indices, values_, options); - Tensor values = transform_values_if_needed(values_, options); + Tensor values = argcheck_and_expand_values_if_needed(indices, values_, options); // If sizes are not given, it is inferred as max index of each dim. int64_t sparse_dim = indices.size(0); @@ -156,15 +157,8 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const Ten LongTensor min_indices = std::get(indices.min(/* dim */ 1, /* keepdim */ false)); LongTensor computed_indices_sizes = std::get(indices.max(/* dim */ 1, /* keepdim */ false)); computed_indices_sizes.add_(1); // len = max_index + 1 - LongTensor cpu_min_indices, cpu_computed_indices_sizes; - if (computed_indices_sizes.is_cuda()) { - cpu_computed_indices_sizes = at::empty(computed_indices_sizes.sizes(), at::initialTensorOptions().dtype(kLong)); - cpu_computed_indices_sizes.copy_(computed_indices_sizes); - cpu_min_indices = min_indices.to(at::DeviceType::CPU); - } else { - cpu_min_indices = min_indices; - cpu_computed_indices_sizes = computed_indices_sizes; - } + LongTensor cpu_min_indices = min_indices.to(at::DeviceType::CPU); + LongTensor cpu_computed_indices_sizes = computed_indices_sizes.to(at::DeviceType::CPU); auto cpu_min_indices_accessor = cpu_min_indices.accessor(); auto cpu_computed_indices_sizes_accessor = cpu_computed_indices_sizes.accessor(); for (int64_t d = 0; d < sparse_dim; d++) { @@ -185,13 +179,13 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const Ten computed_sizes[static_cast(sparse_dim + d)] = values.size(d+1); } - return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, computed_sizes, indices, values, options); + return at::_sparse_coo_tensor_with_dims_and_tensors( + sparse_dim, dense_dim, computed_sizes, indices, values, values.options().layout(kSparse)); } // NB: Got rid of the sizes == NULL case Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { - tensor_init_arg_check(indices, values_, options); - Tensor values = transform_values_if_needed(values_, options); + Tensor values = argcheck_and_expand_values_if_needed(indices, values_, options); int64_t sparse_dim = indices.size(0); int64_t dense_dim = values.dim() - 1; @@ -225,7 +219,8 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef< } } - return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values, options); + return at::_sparse_coo_tensor_with_dims_and_tensors( + sparse_dim, dense_dim, size, indices, values, values.options().layout(kSparse)); } // NOTE: _sparse_coo_tensor_unsafe() differs from sparse_coo_tensor() @@ -234,13 +229,13 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef< // are guaranteed to be within bounds. // NB: Got rid of the size == NULL case Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { - tensor_init_arg_check(indices, values_, options); - Tensor values = transform_values_if_needed(values_, options); + Tensor values = argcheck_and_expand_values_if_needed(indices, values_, options); int64_t sparse_dim = indices.size(0); int64_t dense_dim = values.dim() - 1; - return at::_sparse_coo_tensor_with_dims_and_tensors(sparse_dim, dense_dim, size, indices, values, options); + return at::_sparse_coo_tensor_with_dims_and_tensors( + sparse_dim, dense_dim, size, indices, values, values.options().layout(kSparse)); } // NB: Deleted newWithSizeNd variants diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu index 36511bbf58d61..401d3833fd4c3 100644 --- a/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu +++ b/aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu @@ -47,11 +47,9 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { // For indices, a simple sort + unique suffices // For values, we use a custom kernel for segmented reduction (can't use Thrust due to indirection). - // TODO: I'm not sure if this could ever be non-contiguous - LongTensor values = self._values().contiguous(); + Tensor values = self._values(); int64_t sparse_dim = self.sparse_dim(); - int64_t stride = values.stride(0); // indices will be modified by Thrust, so we have to clone or use new storage // here. @@ -90,21 +88,26 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) { newValues_size[0] = newNnz; Tensor newValues = at::empty(newValues_size, values.options()); - dim3 grid(THCCeilDiv(newNnz, (int64_t) 4), THCCeilDiv(stride, (int64_t) 128)); - dim3 block(32, 4); - AT_DISPATCH_ALL_TYPES_AND_HALF( - values.type(), "coalesce_sparse_cuda", [&] { - using cuda_accscalar_t = acc_type; - apply::coalesceValuesKernel<<>>( - uniqueOffsets.data(), - origIndices.data(), - values.data(), - newValues.data(), - nnz, - newNnz, - stride - ); - }); + // If there is no values to copy, save running the kernel. + if (newValues.numel() > 0) { + values = values.contiguous(); + int64_t stride = at::prod_intlist(values.sizes().slice(1)); + dim3 grid(THCCeilDiv(newNnz, (int64_t) 4), THCCeilDiv(stride, (int64_t) 128)); + dim3 block(32, 4); + AT_DISPATCH_ALL_TYPES_AND_HALF( + values.type(), "coalesce_sparse_cuda", [&] { + using cuda_accscalar_t = acc_type; + apply::coalesceValuesKernel<<>>( + uniqueOffsets.data(), + origIndices.data(), + values.data(), + newValues.data(), + nnz, + newNnz, + stride + ); + }); + } // this grid-strided version is slower but probably more flexible // to different sizes diff --git a/test/common_utils.py b/test/common_utils.py index e6d497d47c236..377522b677fd1 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -291,6 +291,8 @@ def assertTensorsSlowEqual(self, x, y, prec=None, message=''): self.assertLessEqual(max_err, prec, message) def genSparseTensor(self, size, sparse_dim, nnz, is_uncoalesced, device='cpu'): + # Assert not given impossible combination, where the sparse dims have + # empty numel, but nnz > 0 makes the indices containing values. assert all(size[d] > 0 for d in range(sparse_dim)) or nnz == 0, 'invalid arguments' v_size = [nnz] + list(size[sparse_dim:]) diff --git a/test/test_autograd.py b/test/test_autograd.py index a35656dbc4195..9266e4a23c2d3 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -579,7 +579,8 @@ def backward(self, grad_x): self.assertEqual(x.grad, sparse_grad1 + sparse_grad2) def test_sparse_ctor_getter_backward(self): - def test(size, sparse_dim, nnz): + # See NOTE [ Sparse: autograd and API ] on the expected behavior of this test + def test(size, sparse_dim, nnz, device): v_size = [nnz] + list(size[sparse_dim:]) i = torch.rand(sparse_dim, nnz) i.mul_(torch.tensor(size[:sparse_dim]).unsqueeze(1).to(i)) @@ -587,38 +588,35 @@ def test(size, sparse_dim, nnz): inp = torch.randn(v_size, requires_grad=True) other = self.genSparseTensor(size, sparse_dim, nnz, is_uncoalesced=True)[0] + other = other.to(device) - # Use .indices() and .values() - def fn1(v): - x = torch.sparse_coo_tensor(i, v, size) + def fn(v): + x = torch.sparse_coo_tensor(i, v, size, device=device) y = (x + other).coalesce() yv = y.values() new_v = yv.tanh() z = torch.sparse_coo_tensor(y.indices(), new_v, y.size()) return z.coalesce().values() - # Use ._indices() and ._values() - def fn2(v): - x = torch.sparse_coo_tensor(i, v, size) - y = (x + other) - yv = y._values() - new_v = yv.tanh() - z = torch.sparse_coo_tensor(y._indices(), new_v, y.size()) - return z.coalesce()._values() - - for fn in (fn1, fn2): - gradcheck(fn, (inp,)) - # FIXME: make gradgradcheck work. - # gradgradcheck(fn, (inp,)) - - for size in ([0, 10], [2, 0, 3]): - for sparse_dim in (0, 1, 2): - for nnz in (0, 10): - if nnz > 0 and any(size[d] == 0 for d in range(sparse_dim)): - # impossible combination because the sparse dims have - # empty numel, but nnz > 0. - continue - test(size=size, sparse_dim=sparse_dim, nnz=nnz) + gradcheck(fn, (inp,)) + # FIXME: make gradgradcheck work. + # gradgradcheck(fn, (inp,)) + + # assert that _values is non-differentiable + with self.assertRaisesRegex(RuntimeError, "does not have a grad_fn"): + other.detach().requires_grad_()._values().backward(torch.ones_like(other._values())) + + devices = ['cpu'] + + if torch.cuda.is_available(): + devices.append('cuda') + + for empty_i, empty_v, empty_nnz in product([True, False], repeat=3): + sparse_size = [] if empty_i else [2, 1] + dense_size = [1, 0, 2] if empty_v else [1, 2] + nnz = 0 if empty_nnz else 5 + for device in devices: + test(sparse_size + dense_size, len(sparse_size), nnz, device) def test_multi_backward(self): x = torch.randn(5, 5, requires_grad=True) diff --git a/test/test_sparse.py b/test/test_sparse.py index a26bd2bfc1665..b08f3e79a2bb3 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -169,6 +169,16 @@ def test_shape(sparse_dims, nnz, with_size): self.assertEqual(x._indices().numel(), 0) self.assertEqual(x._values().numel(), 0) + @skipIfRocm + def test_coalecce(self): + for empty_i, empty_v, empty_nnz in itertools.product([True, False], repeat=3): + sparse_size = [] if empty_i else [2, 1] + dense_size = [1, 0, 2] if empty_v else [1, 2] + nnz = 0 if empty_nnz else 5 + + t, _, _ = self._gen_sparse(len(sparse_size), nnz, sparse_size + dense_size) + self.safeCoalesce(t) # this tests correctness + def test_ctor_size_checks(self): indices = self.IndexTensor([ [0, 0, 0], diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index 81065d6df1702..38077e763cab7 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -577,8 +577,8 @@ self: pow_backward_self(grad, self, exponent) exponent: pow_backward_exponent(grad, self, exponent) -- name: pow(Scalar self, Tensor exponent) - exponent: pow_backward_exponent(grad, self, exponent) +- name: pow(Scalar base, Tensor self) + self: pow_backward_exponent(grad, base, self) - name: _prod(Tensor self, int64_t dim, bool keepdim) self: prod_backward(grad, self, result, dim, keepdim) @@ -813,10 +813,12 @@ self: not_implemented("_standard_gamma_grad") - name: values(Tensor self) - self: sparse_values_backward(grad, self.indices(), self.sizes(), self.is_coalesced()) + self: at::_sparse_coo_tensor_unsafe(self.indices(), grad, self.sizes())._coalesced_(true); +# Why is _values() not differentiable? +# See NOTE [ Sparse: autograd and API ] - name: _values(Tensor self) - self: sparse_values_backward(grad, self._indices(), self.sizes(), self.is_coalesced()) + output_differentiability: [False] # NN - name: _trilinear(Tensor i1, Tensor i2, Tensor i3, IntList expand1, IntList expand2, IntList expand3, IntList sumdim, int64_t unroll_dim) diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp index 1abc69ed20e92..a96d29b5c6a28 100644 --- a/tools/autograd/templates/Functions.cpp +++ b/tools/autograd/templates/Functions.cpp @@ -875,15 +875,15 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala } -// NOTE [ as_strided Backward ] +// NOTE [ as_strided Backward and layout-aware/agnostic autograd ] // // `storage_offset` is ignored for simplicity in this note. If you just want the // full algorithm without explanation, scroll down to bottom of this note. // // Implementing the backward of as_strided is tricky because you have to deal -// with mappings that maps one memory location to multiple indices, i.e., the -// output tensor indices pointing to **overlapping** memory addresses. This can -// happen in all in all sorts of weird cases. For example, +// with mappings that map one memory location to multiple indices, i.e., the +// output tensor has multiple indices pointing to **overlapping** memory +// addresses. This can happen in all in all sorts of weird cases. For example, // // x = torch.randn(15) // x.as_strided([3, 3], [1, 0]) # "expand" case @@ -892,7 +892,7 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // # res[0, 1] points to 0*3 + 1*6 = 6 // // Here is the general strategy we apply in implementing as_strided backward: -// 0. ??? (optimizaiont step. we will talk about this later) +// 0. ??? (optimization step. we will talk about this later) // 1. Create some underlying flattened tensor as if it is the base tensor // representing the contiguous memory storage for both input and output. // 2. Use the output geometry to scatter (or index_add) the gradients into @@ -903,7 +903,7 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // // In step (2), if the output tensor does't have overlapping memory, we can // safely scatter (`storage.as_strided(output_geometry).copy_(grad)`); -// otherwise, we must use `index_add` as gradient at different indices may need +// otherwise, we must use `index_add` as gradients at different indices may need // to be summed to a single location. // // For example, in this case: @@ -934,10 +934,10 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // x = t.expand(3, 3) # input with overlapping memory // # size [3, 3] // # stride [0, 1] -// y = x.as_strided([3], [1]) # contiguous output -// # size [3] +// y = x.as_strided([1], [1]) # contiguous output +// # size [1] // # stride [1] -// y.backward() # step (1): contiguous storagte tensor `s` of size 3, which +// y.backward() # step (1): contiguous storage tensor `s` of size 3, which // is large enough to be used as underlying storage // for `x` and `y`. // s = [ 0, 0, 0] @@ -970,17 +970,17 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // In the above computation of numerical gradients, they only match the // analytical results because strides and memory locations are considered in the // forward pass, i.e., this op (including both forward and backward) is -// stride-aware. +// layout-aware. // -// However, most (probably all) other ops (forward and backward) are -// stride-agnostic. E.g., +// However, in PyTorch, most (probably all) other ops (forward and backward) are +// layout-agnostic. E.g., // // t = torch.randn(1) // x = t.expand(2) // y = x.sum() // y.backward() // -// Stride-agnostic autograd (as it is currently in PyTorch) will give you +// Layout-agnostic autograd (as it is currently in PyTorch) will give you // // gy = 1 // gx = [ 1, 1] # SumBackward: torch.ones_like(x) @@ -990,28 +990,30 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // (the other will also change by `delta`), `y` will change by `2 * delta`. So // the gradients, if strides are taken into consideration, should be 2. // -// Stride-aware autograd should give you +// Layout-aware autograd should give you // // gy = 1 // gx = [ 2, 2] # Because the backward considers the fact that the input `x` // # is already expanded. -// gt = [ 2] # Stride-aware backward of expand is just a slicing because +// gt = [ 2] # Layout-aware backward of expand is just a slicing because // # the previous backward should have already taken care of // # strides and made sure that gradients are the same along the // # expanded dimension. // // As shown above, these two types are not compatible. Therefore, we must either -// make as_strided stride-agnostic, or make all other ops stride-aware. +// make as_strided layout-agnostic, or make all other ops layout-aware. // -// It is unrealisitc to support stride-aware autograd (at least in the current -// structure), because it would mean +// It is difficult to support layout-aware autograd (at least in the current +// codebase structure), because it would mean // 1. storing tensor geometries of every input tensor for backward // 2. depending on input geometry, the gradient computed from backward change // 3. ideally enforcing gradient of T to always have same strides as T // (although these two methods only differ when it comes to overlapping memory) // -// To formulate `as_strided(input, size, stride)` in a stride-agnostic way, we -// consider `input.stride()` as a separate independent arguement `input_stride`: +// Therefore, we must formulate `as_strided` in a layout-agnostic way, i.e., +// giving the same output regardless of the input layout. We consider +// `input.stride()` as a separate independent fixed argument `input_stride`. +// Then, `as_strided(input, size, stride)` can be thought of as: // 1. "Scatter" each value of `input` into a "storage" using storage location // computed from the value's index in `input`, `input.size()` and // `input_stride`, but if N values end up in the same location, the value @@ -1021,7 +1023,9 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // Denote the set of all input indices that pointing to the same storage // location `storage[n]` as `S(n)`, i.e., // -// S(n) = { index : index @ input_stride == n, index is valid given input.size() } +// S(n) = { index : == n, index is valid given input.size() }, +// +// where `` is the dot product between `x` and `y`. // // Then, the process is: // @@ -1029,7 +1033,9 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // // Note that all values in `S(n)` are the same (they point to the same // memory location anyways, so this step doesn't change anything, but -// effectively avoids using `input.stride()`. +// effectively avoids having the denpendency on the layout of `input`. +// I.e., the result holds fixed regardless of the layout of `input`, as +// long as `input_stride` is fixed. // // NOTE: for forward pass, we can equivalently simply selet any one of // `S(n)` as `storage[n]`. However, cosnidering this as an average @@ -1039,7 +1045,7 @@ Tensor softplus_double_backward(const Tensor & grad, const Tensor & input, Scala // 2. As usual, return the as_strided view of `storage` using required output // `size` and `stride`. // -// To backward through this stride-agnostic version, we simply add the following +// To backward through this layout-agnostic version, we simply add the following // step: // .... (scatter gradients into the storage tensor using output geometry) // 3. For all storage location n, `storage[n] /= |S(n)|`. @@ -1312,13 +1318,13 @@ static inline int64_t _min_storage_size(IntList sizes, IntList strides, int64_t return storage_size; } -// See NOTE [ as_strided Backward ] for explanation +// See NOTE [ as_strided Backward and layout-aware/agnostic autograd ] for explanation Tensor as_strided_backward(Tensor grad, TensorGeometry input_geometry, IntList sizes, IntList strides, int64_t storage_offset) { // For output geometry, // check for size 0 dimensions, // skip size 1 dimensions, // reduce grad on expanded dims (stride=0, size>1) - // Step (0) for the algorithm in NOTE [ as_strided Backward ] + // Step (0) for the algorithm in NOTE [ as_strided Backward and layout-aware/agnostic autograd ] // Step (0)~(1) for the algorithm in NOTE [ Detecting Memory Overlap Within A Strided Tensor ] // on output geometry auto odim = grad.dim(); @@ -1369,7 +1375,7 @@ Tensor as_strided_backward(Tensor grad, TensorGeometry input_geometry, IntList s // Rest of this function implements - // Step (1)~(4) for the algorithm in NOTE [ as_strided Backward ] + // Step (1)~(4) for the algorithm in NOTE [ as_strided Backward and layout-aware/agnostic autograd ] // TODO: Raise if not all output values are visible in input geometry. // Technically speaking, if you treat those values as constants, not // raising is fine, and mathematically correct. However, these values @@ -1988,22 +1994,13 @@ Tensor log1p_backward(const Tensor& grad, const Tensor& self) { return grad / (self + 1); } -Tensor sparse_values_backward(const Tensor& grad_values, const Tensor& indices, - IntList sizes, bool input_is_coalesced) { - auto grad_input = at::_sparse_coo_tensor_unsafe(indices, grad_values, sizes); - return grad_input._coalesced_(input_is_coalesced); -} - Tensor sparse_constructor_values_backward(const Tensor& sparse_grad_out, const Tensor& indices, IntList values_shape) { // TODO: improve this backward by writing a kernel (maybe) auto sparse_dim = indices.size(0); - if (sparse_dim == 0) { - return at::zeros(values_shape, sparse_grad_out._values().options()); - } - auto dense_grad = sparse_grad_out.to_dense(); + auto dense_grad = sparse_grad_out.is_sparse() ? sparse_grad_out.to_dense() : sparse_grad_out; auto full_size = sparse_grad_out.sizes(); auto flattened_grad_shape = values_shape.vec(); - flattened_grad_shape[0] = at::prod_intlist(full_size.slice(0, sparse_dim)); + flattened_grad_shape[0] = at::prod_intlist(full_size.slice(0, indices.size(0))); auto flattened_dense_grad = dense_grad.view(flattened_grad_shape); auto flattened_indices = at::sparse::flatten_indices(indices, full_size); return flattened_dense_grad.index_select(0, flattened_indices); diff --git a/torch/csrc/utils/tensor_new.cpp b/torch/csrc/utils/tensor_new.cpp index faffd278841c3..7f2a70827acba 100644 --- a/torch/csrc/utils/tensor_new.cpp +++ b/torch/csrc/utils/tensor_new.cpp @@ -221,8 +221,8 @@ Tensor internal_new_from_data( *torch::getLayout(type.backend()), type_inference_device_type); const auto& type_to_use = type_inference ? type_inference_type : type; - return copy_variables ? new_with_tensor_copy(type_to_use, var, device_index) : - new_with_type_conversion(type_to_use, var, device_index); + return copy_variables ? new_with_tensor_copy(type_to_use, var, device_index) + : new_with_type_conversion(type_to_use, var, device_index); } #ifdef USE_NUMPY @@ -464,10 +464,7 @@ Tensor legacy_new_from_data( return internal_new_from_data(type, device, data, false, false, false); } -Tensor sparse_coo_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs) { - const auto sparse_backend = type.is_cuda() ? Backend::SparseCUDA : Backend::SparseCPU; - const auto& default_sparse_type = type.toBackend(sparse_backend); - +Tensor sparse_coo_tensor_ctor(const Type& default_type, PyObject* args, PyObject* kwargs) { static PythonArgParser parser({ "sparse_coo_tensor(PyObject* indices, PyObject* values, *, ScalarType dtype=None, Device? device=None, bool requires_grad=False)", "sparse_coo_tensor(PyObject* indices, PyObject* values, IntList size, *, ScalarType dtype=None, Device? device=None, bool requires_grad=False)", @@ -478,27 +475,27 @@ Tensor sparse_coo_tensor_ctor(const Type& type, PyObject* args, PyObject* kwargs auto r = parser.parse(args, kwargs, parsed_args); if (r.idx == 0) { bool type_inference = r.isNone(2); - const auto& sparse_type = typeWithDefault(r, 2, 3, default_sparse_type); - const auto& dense_type = sparse_type.toBackend(sparse_type.is_cuda() ? Backend::CUDA : Backend::CPU); + const auto& type = typeWithDefault(r, 2, 3, default_type); + const auto& values_type = type.toDense(); at::DeviceGuard device_guard(r.device(3)); - Tensor values = internal_new_from_data(dense_type, r.deviceOptional(3), r.pyobject(1), false, true, type_inference); // if no dtype provided, infer type based on value type. - const auto& index_type = values.type().toScalarType(kLong); - Tensor indices = internal_new_from_data(index_type, r.deviceOptional(3), r.pyobject(0), false, true, false); - return at::sparse_coo_tensor(indices, values).set_requires_grad(r.toBool(4)); + Tensor values = internal_new_from_data(values_type, r.deviceOptional(3), r.pyobject(1), false, true, type_inference); + const auto& indices_type = values.type().toScalarType(kLong); + Tensor indices = internal_new_from_data(indices_type, r.deviceOptional(3), r.pyobject(0), false, true, false); + return at::sparse_coo_tensor(indices, values, values.options().layout(at::kSparse)).set_requires_grad(r.toBool(4)); } else if (r.idx == 1) { bool type_inference = r.isNone(3); - const auto& sparse_type = typeWithDefault(r, 3, 4, default_sparse_type); - const auto& dense_type = sparse_type.toBackend(sparse_type.is_cuda() ? Backend::CUDA : Backend::CPU); + const auto& type = typeWithDefault(r, 3, 4, default_type); + const auto& values_type = type.toDense(); at::DeviceGuard device_guard(r.device(4)); - Tensor values = internal_new_from_data(dense_type, r.deviceOptional(4), r.pyobject(1), false, true, type_inference); - const auto& index_type = values.type().toScalarType(kLong); - Tensor indices = internal_new_from_data(index_type, r.deviceOptional(4), r.pyobject(0), false, true, false); - return at::sparse_coo_tensor(indices, values, r.intlist(2)).set_requires_grad(r.toBool(5)); + Tensor values = internal_new_from_data(values_type, r.deviceOptional(4), r.pyobject(1), false, true, type_inference); + const auto& indices_type = values.type().toScalarType(kLong); + Tensor indices = internal_new_from_data(indices_type, r.deviceOptional(4), r.pyobject(0), false, true, false); + return at::sparse_coo_tensor(indices, values, r.intlist(2), values.options().layout(at::kSparse)).set_requires_grad(r.toBool(5)); } else if (r.idx == 2) { - const auto& sparse_type_to_use = typeWithDefault(r, 1, 2, default_sparse_type); + const auto& type = typeWithDefault(r, 1, 2, default_type); at::DeviceGuard device_guard(r.device(2)); - return at::sparse_coo_tensor(r.intlist(0), sparse_type_to_use.options()).set_requires_grad(r.toBool(3)); + return at::sparse_coo_tensor(r.intlist(0), type.options().layout(at::kSparse)).set_requires_grad(r.toBool(3)); } throw std::runtime_error("sparse_coo_tensor(): invalid arguments"); } From 5649862067c93f3fe53673e4f2d8cdddc748f18f Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Sun, 21 Oct 2018 11:39:42 -0700 Subject: [PATCH 16/23] update expect because _values is nondifferentiable --- test/expect/TestCudaSparse.test_print.expect | 21 +++++++------------ ...estCudaUncoalescedSparse.test_print.expect | 21 +++++++------------ test/expect/TestSparse.test_print.expect | 15 +++++++------ .../TestUncoalescedSparse.test_print.expect | 15 +++++++------ 4 files changed, 28 insertions(+), 44 deletions(-) diff --git a/test/expect/TestCudaSparse.test_print.expect b/test/expect/TestCudaSparse.test_print.expect index 2745fb59a2ba3..dd924a8385cc1 100644 --- a/test/expect/TestCudaSparse.test_print.expect +++ b/test/expect/TestCudaSparse.test_print.expect @@ -32,8 +32,7 @@ tensor(indices=tensor([], size=(0, 4)), # _indices tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], device='cuda:0', dtype=torch.float32, - grad_fn=) +tensor([0., 1.], device='cuda:0', dtype=torch.float32) # shape: torch.Size([0]) # nnz: 10 @@ -69,8 +68,7 @@ tensor(indices=tensor([], size=(0, 20)), # _indices tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32) # shape: torch.Size([2]) # nnz: 3 @@ -121,8 +119,7 @@ tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32, - grad_fn=) + [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32) # shape: torch.Size([100, 3]) # nnz: 3 @@ -173,8 +170,7 @@ tensor([[0, 1, 2]], device='cuda:0') # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], - [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32, - grad_fn=) + [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32) # shape: torch.Size([100, 20, 3]) # nnz: 0 @@ -210,8 +206,7 @@ tensor(indices=tensor([], size=(2, 0)), # _indices tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 3 @@ -247,8 +242,7 @@ tensor(indices=tensor([], size=(0, 6)), # _indices tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 0 @@ -284,5 +278,4 @@ tensor(indices=tensor([], size=(0, 0)), # _indices tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32) diff --git a/test/expect/TestCudaUncoalescedSparse.test_print.expect b/test/expect/TestCudaUncoalescedSparse.test_print.expect index 30b8403a2eb64..e982b059933b9 100644 --- a/test/expect/TestCudaUncoalescedSparse.test_print.expect +++ b/test/expect/TestCudaUncoalescedSparse.test_print.expect @@ -32,8 +32,7 @@ tensor(indices=tensor([], size=(0, 4)), # _indices tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], device='cuda:0', dtype=torch.float32, - grad_fn=) +tensor([0., 1.], device='cuda:0', dtype=torch.float32) # shape: torch.Size([0]) # nnz: 10 @@ -69,8 +68,7 @@ tensor(indices=tensor([], size=(0, 20)), # _indices tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32) # shape: torch.Size([2]) # nnz: 3 @@ -121,8 +119,7 @@ tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32, - grad_fn=) + [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32) # shape: torch.Size([100, 3]) # nnz: 3 @@ -173,8 +170,7 @@ tensor([[0, 1, 0]], device='cuda:0') # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], - [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32, - grad_fn=) + [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32) # shape: torch.Size([100, 20, 3]) # nnz: 0 @@ -210,8 +206,7 @@ tensor(indices=tensor([], size=(2, 0)), # _indices tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 3 @@ -247,8 +242,7 @@ tensor(indices=tensor([], size=(0, 6)), # _indices tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 0 @@ -284,5 +278,4 @@ tensor(indices=tensor([], size=(0, 0)), # _indices tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64) # _values -tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32, - grad_fn=) +tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32) diff --git a/test/expect/TestSparse.test_print.expect b/test/expect/TestSparse.test_print.expect index 4f282194f5cb8..d0501228f9262 100644 --- a/test/expect/TestSparse.test_print.expect +++ b/test/expect/TestSparse.test_print.expect @@ -30,7 +30,7 @@ tensor(indices=tensor([], size=(0, 2)), # _indices tensor([], size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], dtype=torch.float32, grad_fn=) +tensor([0., 1.], dtype=torch.float32) # shape: torch.Size([0]) # nnz: 10 @@ -64,7 +64,7 @@ tensor(indices=tensor([], size=(0, 10)), # _indices tensor([], size=(0, 10), dtype=torch.int64) # _values -tensor([], size=(10, 0), dtype=torch.float32, grad_fn=) +tensor([], size=(10, 0), dtype=torch.float32) # shape: torch.Size([2]) # nnz: 3 @@ -110,7 +110,7 @@ tensor([], size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], dtype=torch.float32, grad_fn=) + [1.3333, 1.6667]], dtype=torch.float32) # shape: torch.Size([100, 3]) # nnz: 3 @@ -156,8 +156,7 @@ tensor([[0, 1, 2]]) # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], - [1.3333, 1.5556, 1.7778]], dtype=torch.float32, - grad_fn=) + [1.3333, 1.5556, 1.7778]], dtype=torch.float32) # shape: torch.Size([100, 20, 3]) # nnz: 0 @@ -191,7 +190,7 @@ tensor(indices=tensor([], size=(2, 0)), # _indices tensor([], size=(2, 0), dtype=torch.int64) # _values -tensor([], size=(0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 3 @@ -225,7 +224,7 @@ tensor(indices=tensor([], size=(0, 3)), # _indices tensor([], size=(0, 3), dtype=torch.int64) # _values -tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(3, 10, 0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 0 @@ -259,4 +258,4 @@ tensor(indices=tensor([], size=(0, 0)), # _indices tensor([], size=(0, 0), dtype=torch.int64) # _values -tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 10, 0, 3), dtype=torch.float32) diff --git a/test/expect/TestUncoalescedSparse.test_print.expect b/test/expect/TestUncoalescedSparse.test_print.expect index a642b4bb8c0ab..7a2e3c68b9ed1 100644 --- a/test/expect/TestUncoalescedSparse.test_print.expect +++ b/test/expect/TestUncoalescedSparse.test_print.expect @@ -30,7 +30,7 @@ tensor(indices=tensor([], size=(0, 2)), # _indices tensor([], size=(0, 2), dtype=torch.int64) # _values -tensor([0., 1.], dtype=torch.float32, grad_fn=) +tensor([0., 1.], dtype=torch.float32) # shape: torch.Size([0]) # nnz: 10 @@ -64,7 +64,7 @@ tensor(indices=tensor([], size=(0, 10)), # _indices tensor([], size=(0, 10), dtype=torch.int64) # _values -tensor([], size=(10, 0), dtype=torch.float32, grad_fn=) +tensor([], size=(10, 0), dtype=torch.float32) # shape: torch.Size([2]) # nnz: 3 @@ -110,7 +110,7 @@ tensor([], size=(0, 3), dtype=torch.int64) # _values tensor([[0.0000, 0.3333], [0.6667, 1.0000], - [1.3333, 1.6667]], dtype=torch.float32, grad_fn=) + [1.3333, 1.6667]], dtype=torch.float32) # shape: torch.Size([100, 3]) # nnz: 3 @@ -156,8 +156,7 @@ tensor([[0, 1, 0]]) # _values tensor([[0.0000, 0.2222, 0.4444], [0.6667, 0.8889, 1.1111], - [1.3333, 1.5556, 1.7778]], dtype=torch.float32, - grad_fn=) + [1.3333, 1.5556, 1.7778]], dtype=torch.float32) # shape: torch.Size([100, 20, 3]) # nnz: 0 @@ -191,7 +190,7 @@ tensor(indices=tensor([], size=(2, 0)), # _indices tensor([], size=(2, 0), dtype=torch.int64) # _values -tensor([], size=(0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 3 @@ -225,7 +224,7 @@ tensor(indices=tensor([], size=(0, 3)), # _indices tensor([], size=(0, 3), dtype=torch.int64) # _values -tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(3, 10, 0, 3), dtype=torch.float32) # shape: torch.Size([10, 0, 3]) # nnz: 0 @@ -259,4 +258,4 @@ tensor(indices=tensor([], size=(0, 0)), # _indices tensor([], size=(0, 0), dtype=torch.int64) # _values -tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=) +tensor([], size=(0, 10, 0, 3), dtype=torch.float32) From 683defc898c5167bd5fc6ba93fc1998833444555 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Sun, 21 Oct 2018 14:50:53 -0400 Subject: [PATCH 17/23] Fix narrow_copy_sparse compilation --- aten/src/ATen/native/TensorShape.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index 48d9d9a9083c5..c8f0d61aee683 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -173,14 +173,14 @@ Tensor narrow_copy_sparse(const Tensor& self, int64_t dim, int64_t start, int64_ "Dimension ", dim, " out of range. Expecting 0 <= dim < ", allDim, "."); AT_CHECK(start >= 0 && length >= 0 && end <= self.size(dim), "Invalid range to narrow. range(start, start+length) must be a subset of range(0, ", self.size(dim), ").") - LongTensor indices = self._indices(); + Tensor indices = self._indices(); int64_t sparse_dim = self.sparse_dim(); std::vector new_sizes = self.sizes().vec(); new_sizes[dim] = length; Tensor new_values; - LongTensor new_indices; + Tensor new_indices; if (dim < sparse_dim) { Tensor mask = (indices[dim] >= start).__and__((indices[dim] < end)); new_indices = indices.masked_select(mask).view({sparse_dim, -1}); From 7c0cf99a05f9b4e6c2f22d0008c89766130d171a Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Sun, 21 Oct 2018 14:59:06 -0400 Subject: [PATCH 18/23] Revert unintended derivatives.yaml change; --- tools/autograd/derivatives.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index 38077e763cab7..e0d4663492427 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -577,8 +577,8 @@ self: pow_backward_self(grad, self, exponent) exponent: pow_backward_exponent(grad, self, exponent) -- name: pow(Scalar base, Tensor self) - self: pow_backward_exponent(grad, base, self) +- name: pow(Scalar self, Tensor exponent) + exponent: pow_backward_exponent(grad, self, exponent) - name: _prod(Tensor self, int64_t dim, bool keepdim) self: prod_backward(grad, self, result, dim, keepdim) From 1e053df5a70608eb191da2421e9eac795da51dfa Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Sun, 21 Oct 2018 15:17:29 -0400 Subject: [PATCH 19/23] revert no_grad viewer test since that part of the change is not getting in this time --- test/test_autograd.py | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/test/test_autograd.py b/test/test_autograd.py index 9266e4a23c2d3..0a515b1cf8f6c 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -767,36 +767,28 @@ def scope(): def test_no_grad(self): x = torch.ones(5, 5, requires_grad=True) - y = torch.ones(5, 5) * 4 + y = Variable(torch.ones(5, 5) * 4) + with torch.no_grad(): + w = x + y + @torch.no_grad() def adder(x, y): return x + y - def viewer(x, y): - return x[1] - - for binary_op in (adder, viewer): - with torch.no_grad(): - w = binary_op(x, y) - - @torch.no_grad() - def decorated(x, y): - return binary_op(x, y) - - z = decorated(x, y) + z = adder(x, y) - self.assertFalse(w.requires_grad) - self.assertRaises(RuntimeError, lambda: w.backward(torch.ones(5, 5))) - self.assertIsNone(w.grad_fn) - self.assertFalse(z.requires_grad) - self.assertRaises(RuntimeError, lambda: z.backward(torch.ones(5, 5))) - self.assertIsNone(z.grad_fn) + self.assertFalse(w.requires_grad) + self.assertRaises(RuntimeError, lambda: w.backward(torch.ones(5, 5))) + self.assertIsNone(w.grad_fn) + self.assertFalse(z.requires_grad) + self.assertRaises(RuntimeError, lambda: z.backward(torch.ones(5, 5))) + self.assertIsNone(z.grad_fn) - # test nested decorator and with-statement on no_grad - with torch.no_grad(): - self.assertFalse(torch.is_grad_enabled()) - w = binary_op(x, y) - self.assertFalse(torch.is_grad_enabled()) + # test nested decorator and with-statement on no_grad + with torch.no_grad(): + self.assertFalse(torch.is_grad_enabled()) + w = adder(x, y) + self.assertFalse(torch.is_grad_enabled()) def test_no_grad_python_function(self): """Python Functions should respect grad mode.""" From 61887290834de6fa792bff46e476df128eddcf76 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Sun, 21 Oct 2018 17:08:50 -0400 Subject: [PATCH 20/23] Fix test_numba_integration may try to produce impossible sparse tensor --- test/test_numba_integration.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_numba_integration.py b/test/test_numba_integration.py index 2894d5daaa54e..310bc1c696803 100644 --- a/test/test_numba_integration.py +++ b/test/test_numba_integration.py @@ -67,14 +67,15 @@ def test_cuda_array_interface(self): # Sparse CPU/CUDA tensors do not implement the interface if tp not in (torch.HalfTensor,): - sparse_t = torch.sparse_coo_tensor(cput[None, :], cput) + indices_t = cput.clamp(min=0) # prevent sparse_coo_tensor error + sparse_t = torch.sparse_coo_tensor(indices_t, cput) self.assertFalse(hasattr(sparse_t, "__cuda_array_interface__")) self.assertRaises( AttributeError, lambda: sparse_t.__cuda_array_interface__ ) - sparse_cuda_t = torch.sparse_coo_tensor(cput[None, :], cput).cuda() + sparse_cuda_t = torch.sparse_coo_tensor(indices_t, cput).cuda() self.assertFalse(hasattr(sparse_cuda_t, "__cuda_array_interface__")) self.assertRaises( From ca38c80899022485da5f62be5df3c4d84fbc1848 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Mon, 22 Oct 2018 13:01:09 -0400 Subject: [PATCH 21/23] fix numba test & improve argcheck --- aten/src/ATen/SparseTensorImpl.cpp | 5 ++- aten/src/ATen/native/sparse/SparseTensor.cpp | 42 ++++++++++++-------- test/test_numba_integration.py | 2 +- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/aten/src/ATen/SparseTensorImpl.cpp b/aten/src/ATen/SparseTensorImpl.cpp index 1ffba3fd04952..21a823cb73701 100644 --- a/aten/src/ATen/SparseTensorImpl.cpp +++ b/aten/src/ATen/SparseTensorImpl.cpp @@ -85,12 +85,15 @@ int64_t SparseTensorImpl::storage_offset() const { void SparseTensorImpl::set_indices_and_values_unsafe(const Tensor& indices, const Tensor& values) { AT_ASSERT(!indices.is_variable() && !values.is_variable()); // They should be plain tensors! + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); + AT_CHECK(!values.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values.layout()); + AT_CHECK(values.type().toSparse() == type(), "values type must match sparse tensor type"); AT_CHECK(indices.type().scalarType() == kLong, "indices must be an int64 tensor"); AT_CHECK(indices.type().backend() == values.type().backend(), "backend of indices (", indices.type().backend(), ") must match backend of values (", values.type().backend(), ")"); AT_CHECK(!indices.is_cuda() || indices.get_device() == values.get_device(), "device of indices (", indices.get_device(), ") must match device of values (", values.get_device(), ")"); - AT_CHECK(indices.dim() == 2, "indices must be nDim x nnz, but got: ", indices.sizes()); + AT_CHECK(indices.dim() == 2, "indices must be sparse_dim x nnz, but got: ", indices.sizes()); AT_CHECK(indices.size(1) == values.size(0), "indices and values must have same nnz, but got nnz from indices: ", indices.size(1), ", nnz from values: ", values.size(0)); AT_CHECK(indices.size(0) == sparse_dim_, "indices has incorrect first dimension, expected ", sparse_dim_, ", got ", indices.size(0)); AT_CHECK(values.dim() == dense_dim_ + 1, "values has incorrect number of dimensions, expected ", dense_dim_ + 1, ", got ", values.dim()); diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index 73a8b6b8343e8..aba245881b8bf 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -121,29 +121,28 @@ Tensor sparse_coo_tensor(ArrayRef size, const TensorOptions& options) { /* Pointer-copy init */ -// helpers +// helper namespace { - // Helper called in **every** sparse_coo_tensor ctor with indices and - // values tensors. - static inline Tensor argcheck_and_expand_values_if_needed( - const LongTensor& indices_, const Tensor& values_, const TensorOptions& options) { - // arg checking - AT_CHECK(!indices_.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices_.layout()); - AT_CHECK(!values_.is_sparse(), "expected values to be a dense tensor, but got values of layout ", values_.layout()); - AT_CHECK(!options.has_layout() || options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); - + static inline Tensor expand_values_if_needed(const Tensor& values) { // expand - if (values_.dim() == 0) { + if (values.dim() == 0) { // Mimic Numpy behavior here and treat it as a 1D tensor - return values_.expand({1}); + return values.expand({1}); } else { - return values_; + return values; } } } Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const TensorOptions& options) { - Tensor values = argcheck_and_expand_values_if_needed(indices, values_, options); + Tensor values = expand_values_if_needed(values_); + + // arg checking + AT_CHECK(!options.has_layout() || options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); + // the following checks are redundant because they are also checked in SparseTensorImpl::set_indices_and_values_unsafe + // but we need to ensure them in order to infer the shape. + AT_CHECK(indices.dim() == 2, "indices must be sparse_dim x nnz, but got: ", indices.sizes()) + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); // If sizes are not given, it is inferred as max index of each dim. int64_t sparse_dim = indices.size(0); @@ -185,8 +184,14 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, const Ten // NB: Got rid of the sizes == NULL case Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { - Tensor values = argcheck_and_expand_values_if_needed(indices, values_, options); - + Tensor values = expand_values_if_needed(values_); + + // arg checking + AT_CHECK(!options.has_layout() || options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); + // the following checks are redundant because they are also checked in SparseTensorImpl::set_indices_and_values_unsafe + // but we need to ensure them in order to infer the shape. + AT_CHECK(indices.dim() == 2, "indices must be sparse_dim x nnz, but got: ", indices.sizes()) + AT_CHECK(!indices.is_sparse(), "expected indices to be a dense tensor, but got indices of layout ", indices.layout()); int64_t sparse_dim = indices.size(0); int64_t dense_dim = values.dim() - 1; AT_CHECK(size.size() == sparse_dim + dense_dim, @@ -229,7 +234,10 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_, ArrayRef< // are guaranteed to be within bounds. // NB: Got rid of the size == NULL case Tensor _sparse_coo_tensor_unsafe(const Tensor& indices, const Tensor& values_, ArrayRef size, const TensorOptions& options) { - Tensor values = argcheck_and_expand_values_if_needed(indices, values_, options); + Tensor values = expand_values_if_needed(values_); + + // arg checking + AT_CHECK(!options.has_layout() || options.layout() == kSparse, "expected sparse layout, but got layout ", options.layout()); int64_t sparse_dim = indices.size(0); int64_t dense_dim = values.dim() - 1; diff --git a/test/test_numba_integration.py b/test/test_numba_integration.py index 310bc1c696803..ba14916a8fbc7 100644 --- a/test/test_numba_integration.py +++ b/test/test_numba_integration.py @@ -67,7 +67,7 @@ def test_cuda_array_interface(self): # Sparse CPU/CUDA tensors do not implement the interface if tp not in (torch.HalfTensor,): - indices_t = cput.clamp(min=0) # prevent sparse_coo_tensor error + indices_t = torch.empty(1, cput.size(0), dtype=torch.long).clamp_(min=0) sparse_t = torch.sparse_coo_tensor(indices_t, cput) self.assertFalse(hasattr(sparse_t, "__cuda_array_interface__")) From 7110438f67d7df0e999863f439c5f3e9f103e90f Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Mon, 22 Oct 2018 18:30:59 -0400 Subject: [PATCH 22/23] skip autograd ifRocm --- test/test_autograd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_autograd.py b/test/test_autograd.py index 0a515b1cf8f6c..fa00280ef5571 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -578,6 +578,7 @@ def backward(self, grad_x): (sparse_fn1(x) + sparse_fn2(x)).sum().backward() self.assertEqual(x.grad, sparse_grad1 + sparse_grad2) + @skipIfRocm def test_sparse_ctor_getter_backward(self): # See NOTE [ Sparse: autograd and API ] on the expected behavior of this test def test(size, sparse_dim, nnz, device): From b31922f2e72b8ef4c4000d9e4e859f3d06b49480 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Tue, 23 Oct 2018 14:30:03 -0400 Subject: [PATCH 23/23] fix ubsan and indexSelect --- aten/src/ATen/SparseTensorUtils.h | 16 ++++++----- aten/src/TH/generic/THTensorEvenMoreMath.cpp | 28 +++++++++++--------- tools/autograd/templates/Functions.cpp | 1 - 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/aten/src/ATen/SparseTensorUtils.h b/aten/src/ATen/SparseTensorUtils.h index 7503b83c10b03..7bf2e938bc844 100644 --- a/aten/src/ATen/SparseTensorUtils.h +++ b/aten/src/ATen/SparseTensorUtils.h @@ -31,7 +31,6 @@ inline bool check_device(ArrayRef ts) { if (ts.empty()) { return true; } - const Tensor& ref_t = ts.front(); int64_t curDevice = current_device(); for (const Tensor& t : ts) { if (t.get_device() != curDevice) return false; @@ -81,29 +80,32 @@ inline Tensor new_values_with_size_of(const Tensor& values, int64_t nnz) { // tensor `t` of shape `full_size`. This returns the corresponding indices to // the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`. // if forceClone is true, the result will forced to be a clone of self. -inline LongTensor flatten_indices(const Tensor& indices, IntList full_size, bool forceClone = false) { +// if force_clone is true, the result will forced to be a clone of self. +inline LongTensor flatten_indices(const Tensor& indices, IntList full_size, bool force_clone = false) { int64_t sparse_dim = indices.size(0); if (sparse_dim == 1) { - if (forceClone) { + if (force_clone) { return indices.squeeze(0).clone(); } else { return indices.squeeze(0); } } else { - std::vector indices_mult_cpu_vec(sparse_dim); + std::vector indices_mult_cpu_vec; + indices_mult_cpu_vec.reserve(sparse_dim); int64_t mult = 1; for (int64_t i = sparse_dim - 1; i >= 0; i--) { indices_mult_cpu_vec[i] = mult; mult *= full_size[i]; } - auto indices_mult_cpu = indices.type().toBackend(/*Dense*/at::Backend::CPU) + auto indices_mult_cpu = indices.type().cpu() .tensorFromBlob(indices_mult_cpu_vec.data(), /*size=*/{sparse_dim, 1}); - auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/true); + // NB: must be blocking because this blob may be freed after this closure, + // and non_blocking copy will see garbage. + auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false); // Ideally we want matmul but matmul is slow on CPU Long and not implemented // on CUDA Long. So mul is faster. return indices.mul(indices_mult).sum(0); } } - }} // namespace at::sparse diff --git a/aten/src/TH/generic/THTensorEvenMoreMath.cpp b/aten/src/TH/generic/THTensorEvenMoreMath.cpp index 7efee5ee8d1be..ad6f995b68e96 100644 --- a/aten/src/TH/generic/THTensorEvenMoreMath.cpp +++ b/aten/src/TH/generic/THTensorEvenMoreMath.cpp @@ -12,9 +12,9 @@ void THTensor_(fill)(THTensor *r_, scalar_t value) TH_TENSOR_APPLY(scalar_t, r_, if (r__stride == 1) { THVector_(fill)(r__data, value, r__size); - r__i = r__size; - r__data += r__stride * r__size; - break; + r__i = r__size; + r__data += r__stride * r__size; + break; } else { *r__data = value; } @@ -181,7 +181,7 @@ void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTens tensor_data = tensor->data(); src_data = src->data(); auto src_size0 = THTensor_sizeLegacyNoScalars(src, 0); - ptrdiff_t rowsize = src_size0 == 0 ? 1: THTensor_(nElement)(src) / src_size0; + ptrdiff_t rowsize = src_size0 == 0 ? 1 : THTensor_(nElement)(src) / src_size0; // check that the indices are within range int64_t max = src_size0 - 1 + TH_INDEX_BASE; @@ -192,14 +192,18 @@ void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTens } } - if (src->dim() <= 1) { - #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i 0) { + if (src->dim() <= 1) { + #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i) + for (i=0; i TH_OMP_OVERHEAD_THRESHOLD) private(i) + for (i=0; idim() <= 1) diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp index a96d29b5c6a28..2752a156b35ec 100644 --- a/tools/autograd/templates/Functions.cpp +++ b/tools/autograd/templates/Functions.cpp @@ -1996,7 +1996,6 @@ Tensor log1p_backward(const Tensor& grad, const Tensor& self) { Tensor sparse_constructor_values_backward(const Tensor& sparse_grad_out, const Tensor& indices, IntList values_shape) { // TODO: improve this backward by writing a kernel (maybe) - auto sparse_dim = indices.size(0); auto dense_grad = sparse_grad_out.is_sparse() ? sparse_grad_out.to_dense() : sparse_grad_out; auto full_size = sparse_grad_out.sizes(); auto flattened_grad_shape = values_shape.vec();