|
| 1 | + |
| 2 | +import torch |
| 3 | +# The autograd package provides automatic differentiation |
| 4 | +# for all operations on Tensors |
| 5 | + |
| 6 | +# requires_grad = True -> tracks all operations on the tensor. |
| 7 | +x = torch.randn(3, requires_grad=True) |
| 8 | +y = x + 2 |
| 9 | + |
| 10 | +# y was created as a result of an operation, so it has a grad_fn attribute. |
| 11 | +# grad_fn: references a Function that has created the Tensor |
| 12 | +print(x) # created by the user -> grad_fn is None |
| 13 | +print(y) |
| 14 | +print(y.grad_fn) |
| 15 | + |
| 16 | +# Do more operations on y |
| 17 | +z = y * y * 3 |
| 18 | +print(z) |
| 19 | +z = z.mean() |
| 20 | +print(z) |
| 21 | + |
| 22 | +# Let's compute the gradients with backpropagation |
| 23 | +# When we finish our computation we can call .backward() and have all the gradients computed automatically. |
| 24 | +# The gradient for this tensor will be accumulated into .grad attribute. |
| 25 | +# It is the partial derivate of the function w.r.t. the tensor |
| 26 | + |
| 27 | +z.backward() |
| 28 | +print(x.grad) # dz/dx |
| 29 | + |
| 30 | +# Generally speaking, torch.autograd is an engine for computing vector-Jacobian product |
| 31 | +# It computes partial derivates while applying the chain rule |
| 32 | + |
| 33 | +# ------------- |
| 34 | +# Model with non-scalar output: |
| 35 | +# If a Tensor is non-scalar (more than 1 elements), we need to specify arguments for backward() |
| 36 | +# specify a gradient argument that is a tensor of matching shape. |
| 37 | +# needed for vector-Jacobian product |
| 38 | + |
| 39 | +x = torch.randn(3, requires_grad=True) |
| 40 | + |
| 41 | +y = x * 2 |
| 42 | +for _ in range(10): |
| 43 | + y = y * 2 |
| 44 | + |
| 45 | +print(y) |
| 46 | +print(y.shape) |
| 47 | + |
| 48 | +v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float32) |
| 49 | +y.backward(v) |
| 50 | +print(x.grad) |
| 51 | + |
| 52 | +# ------------- |
| 53 | +# Stop a tensor from tracking history: |
| 54 | +# For example during our training loop when we want to update our weights |
| 55 | +# then this update operation should not be part of the gradient computation |
| 56 | +# - x.requires_grad_(False) |
| 57 | +# - x.detach() |
| 58 | +# - wrap in 'with torch.no_grad():' |
| 59 | + |
| 60 | +# .requires_grad_(...) changes an existing Tensor’s requires_grad flag in-place. |
| 61 | +a = torch.randn(2, 2) |
| 62 | +print(a.requires_grad) |
| 63 | +b = ((a * 3) / (a - 1)) |
| 64 | +print(b.grad_fn) |
| 65 | +a.requires_grad_(True) |
| 66 | +print(a.requires_grad) |
| 67 | +b = (a * a).sum() |
| 68 | +print(b.grad_fn) |
| 69 | + |
| 70 | +# .detach(): get a new Tensor with the same content but no gradient computation: |
| 71 | +a = torch.randn(2, 2, requires_grad=True) |
| 72 | +print(a.requires_grad) |
| 73 | +b = a.detach() |
| 74 | +print(b.requires_grad) |
| 75 | + |
| 76 | +# wrap in 'with torch.no_grad():' |
| 77 | +a = torch.randn(2, 2, requires_grad=True) |
| 78 | +print(a.requires_grad) |
| 79 | +with torch.no_grad(): |
| 80 | + print((x ** 2).requires_grad) |
| 81 | + |
| 82 | +# ------------- |
| 83 | +# with backward() the gradient for this tensor will be accumulated into .grad attribute. |
| 84 | +# !!! We need to be careful during optimization !!! |
| 85 | +# Use .zero_() to empty the gradients before a new optimization step! |
| 86 | +weights = torch.ones(4, requires_grad=True) |
| 87 | + |
| 88 | +for epoch in range(3): |
| 89 | + # just a dummy example |
| 90 | + model_output = (weights*3).sum() |
| 91 | + model_output.backward() |
| 92 | + |
| 93 | + print(weights.grad) |
| 94 | + |
| 95 | + # optimize model, i.e. adjust weights... |
| 96 | + with torch.no_grad(): |
| 97 | + weights -= 0.1 * weights.grad |
| 98 | + |
| 99 | + # this is important! It affects the final weights & output |
| 100 | + weights.grad.zero_() |
| 101 | + |
| 102 | +print(weights) |
| 103 | +print(model_output) |
| 104 | + |
| 105 | +# Optimizer has zero_grad() method |
| 106 | +# optimizer = torch.optim.SGD([weights], lr=0.1) |
| 107 | +# During training: |
| 108 | +# optimizer.step() |
| 109 | +# optimizer.zero_grad() |
0 commit comments