11"""
22Hooks for autograd saved tensors
3- =======================
3+ ================================
44
55"""
66
1313# packing/unpacking process.
1414#
1515# This tutorial assumes you are familiar with how backpropagation works in
16- # theory. If not, read this first:
17- # https://colab.research.google.com/drive/1aWNdmYt7RcHMbUk-Xz2Cv5-cGFSWPXe0#scrollTo=AHcEJ6nXUb7W
16+ # theory. If not, read `this <https://colab.research.google.com/drive/1aWNdmYt7RcHMbUk-Xz2Cv5-cGFSWPXe0#scrollTo=AHcEJ6nXUb7W>`_ first.
1817#
1918
2019
@@ -107,7 +106,7 @@ def f(x):
107106
108107######################################################################
109108# In the example above, executing without grad would only have kept ``x``
110- # and ``y`` in the scope, But the graph additionnally stores ``f(x)`` and
109+ # and ``y`` in the scope, But the graph additionally stores ``f(x)`` and
111110# ``f(f(x))``. Hence, running a forward pass during training will be more
112111# costly in memory usage than during evaluation (more precisely, when
113112# autograd is not required).
@@ -182,7 +181,7 @@ def unpack_hook(x):
182181
183182
184183######################################################################
185- # The ``pack_hook`` function will be called everytime an operation saves
184+ # The ``pack_hook`` function will be called every time an operation saves
186185# a tensor for backward.
187186# The output of ``pack_hook`` is then stored in the computation graph
188187# instead of the original tensor.
@@ -218,8 +217,9 @@ def unpack_hook(x):
218217#
219218
220219######################################################################
221- # **Returning and int**
222-
220+ # Returning an ``int``
221+ # ^^^^^^^^^^^^^^^^^^^^
222+ #
223223# Returning the index of a Python list
224224# Relatively harmless but with debatable usefulness
225225
@@ -240,8 +240,9 @@ def unpack(x):
240240assert (x .grad .equal (2 * x ))
241241
242242######################################################################
243- # **Returning a tuple**
244-
243+ # Returning a tuple
244+ # ^^^^^^^^^^^^^^^^^
245+ #
245246# Returning some tensor and a function how to unpack it
246247# Quite unlikely to be useful in its current form
247248
@@ -262,9 +263,10 @@ def unpack(packed):
262263assert (torch .allclose (x .grad , 2 * x ))
263264
264265######################################################################
265- # **Returning a str**
266-
267- # Returning the __repr__ of the tensor
266+ # Returning a ``str``
267+ # ^^^^^^^^^^^^^^^^^^^
268+ #
269+ # Returning the ``__repr__ of`` the tensor
268270# Probably never do this
269271
270272x = torch .randn (5 , requires_grad = True )
@@ -337,7 +339,7 @@ def forward(self, x):
337339
338340
339341######################################################################
340- # In practice, on a A100 GPU, for a resnet -152 with batch size 256, this
342+ # In practice, on a A100 GPU, for a ResNet -152 with batch size 256, this
341343# corresponds to a GPU memory usage reduction from 48GB to 5GB, at the
342344# cost of a 6x slowdown.
343345#
0 commit comments