@@ -219,7 +219,9 @@ def forward(self, x):
219219# The simplest update rule used in practice is the Stochastic Gradient
220220# Descent (SGD):
221221#
222- # ``weight = weight - learning_rate * gradient``
222+ # .. code:: python
223+ #
224+ # weight = weight - learning_rate * gradient
223225#
224226# We can implement this using simple Python code:
225227#
@@ -233,18 +235,21 @@ def forward(self, x):
233235# update rules such as SGD, Nesterov-SGD, Adam, RMSProp, etc.
234236# To enable this, we built a small package: ``torch.optim`` that
235237# implements all these methods. Using it is very simple:
236-
237- import torch .optim as optim
238-
239- # create your optimizer
240- optimizer = optim .SGD (net .parameters (), lr = 0.01 )
241-
242- # in your training loop:
243- optimizer .zero_grad () # zero the gradient buffers
244- output = net (input )
245- loss = criterion (output , target )
246- loss .backward ()
247- optimizer .step () # Does the update
238+ #
239+ # .. code:: python
240+ #
241+ # import torch.optim as optim
242+ #
243+ # # create your optimizer
244+ # optimizer = optim.SGD(net.parameters(), lr=0.01)
245+ #
246+ # # in your training loop:
247+ # optimizer.zero_grad() # zero the gradient buffers
248+ # output = net(input)
249+ # loss = criterion(output, target)
250+ # loss.backward()
251+ # optimizer.step() # Does the update
252+ #
248253
249254
250255###############################################################
0 commit comments