Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5c237c8

Browse files
committed
added 03_autograd.py
1 parent bd8ec8d commit 5c237c8

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed

03_autograd.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
2+
import torch
3+
# The autograd package provides automatic differentiation
4+
# for all operations on Tensors
5+
6+
# requires_grad = True -> tracks all operations on the tensor.
7+
x = torch.randn(3, requires_grad=True)
8+
y = x + 2
9+
10+
# y was created as a result of an operation, so it has a grad_fn attribute.
11+
# grad_fn: references a Function that has created the Tensor
12+
print(x) # created by the user -> grad_fn is None
13+
print(y)
14+
print(y.grad_fn)
15+
16+
# Do more operations on y
17+
z = y * y * 3
18+
print(z)
19+
z = z.mean()
20+
print(z)
21+
22+
# Let's compute the gradients with backpropagation
23+
# When we finish our computation we can call .backward() and have all the gradients computed automatically.
24+
# The gradient for this tensor will be accumulated into .grad attribute.
25+
# It is the partial derivate of the function w.r.t. the tensor
26+
27+
z.backward()
28+
print(x.grad) # dz/dx
29+
30+
# Generally speaking, torch.autograd is an engine for computing vector-Jacobian product
31+
# It computes partial derivates while applying the chain rule
32+
33+
# -------------
34+
# Model with non-scalar output:
35+
# If a Tensor is non-scalar (more than 1 elements), we need to specify arguments for backward()
36+
# specify a gradient argument that is a tensor of matching shape.
37+
# needed for vector-Jacobian product
38+
39+
x = torch.randn(3, requires_grad=True)
40+
41+
y = x * 2
42+
for _ in range(10):
43+
y = y * 2
44+
45+
print(y)
46+
print(y.shape)
47+
48+
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float32)
49+
y.backward(v)
50+
print(x.grad)
51+
52+
# -------------
53+
# Stop a tensor from tracking history:
54+
# For example during our training loop when we want to update our weights
55+
# then this update operation should not be part of the gradient computation
56+
# - x.requires_grad_(False)
57+
# - x.detach()
58+
# - wrap in 'with torch.no_grad():'
59+
60+
# .requires_grad_(...) changes an existing Tensor’s requires_grad flag in-place.
61+
a = torch.randn(2, 2)
62+
print(a.requires_grad)
63+
b = ((a * 3) / (a - 1))
64+
print(b.grad_fn)
65+
a.requires_grad_(True)
66+
print(a.requires_grad)
67+
b = (a * a).sum()
68+
print(b.grad_fn)
69+
70+
# .detach(): get a new Tensor with the same content but no gradient computation:
71+
a = torch.randn(2, 2, requires_grad=True)
72+
print(a.requires_grad)
73+
b = a.detach()
74+
print(b.requires_grad)
75+
76+
# wrap in 'with torch.no_grad():'
77+
a = torch.randn(2, 2, requires_grad=True)
78+
print(a.requires_grad)
79+
with torch.no_grad():
80+
print((x ** 2).requires_grad)
81+
82+
# -------------
83+
# with backward() the gradient for this tensor will be accumulated into .grad attribute.
84+
# !!! We need to be careful during optimization !!!
85+
# Use .zero_() to empty the gradients before a new optimization step!
86+
weights = torch.ones(4, requires_grad=True)
87+
88+
for epoch in range(3):
89+
# just a dummy example
90+
model_output = (weights*3).sum()
91+
model_output.backward()
92+
93+
print(weights.grad)
94+
95+
# optimize model, i.e. adjust weights...
96+
with torch.no_grad():
97+
weights -= 0.1 * weights.grad
98+
99+
# this is important! It affects the final weights & output
100+
weights.grad.zero_()
101+
102+
print(weights)
103+
print(model_output)
104+
105+
# Optimizer has zero_grad() method
106+
# optimizer = torch.optim.SGD([weights], lr=0.1)
107+
# During training:
108+
# optimizer.step()
109+
# optimizer.zero_grad()

0 commit comments

Comments
 (0)