import torch


x = torch.ones(2, 2)
print(x)
print(x.requires_grad)

x = torch.ones(2, 2, requires_grad=True)
print(x)
print(x.requires_grad)

tensor([[1., 1.],
        [1., 1.]])
False
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True


#x = torch.ones(2, 2, requires_grad=True)
x = torch.randn(2,2, requires_grad=True)
print(x)

y = x + 2
print(y)

z = 3 * (y ** 2)
print(z)

out = z.mean()
print(out)

out.backward()  # equivalent to out.backward(torch.tensor(1.)) since out contains a single scalar

print(x.grad)

tensor([[ 0.4599, -2.4081],
        [ 0.4585, -0.3363]], requires_grad=True)
tensor([[ 2.4599, -0.4081],
        [ 2.4585,  1.6637]], grad_fn=<AddBackward0>)
tensor([[18.1530,  0.4996],
        [18.1327,  8.3038]], grad_fn=<MulBackward0>)
tensor(11.2723, grad_fn=<MeanBackward0>)
tensor([[ 3.6898, -0.6121],
        [ 3.6878,  2.4956]])


print('y:', y)
print('y.grad:', y.grad)
print('y.requires_grad:', y.requires_grad)
print('y.grad_fn:', y.grad_fn)

print('z:', z)
print('z.grad:', z.grad)
print('z.requires_grad:', z.requires_grad)
print('z.grad_fn:', z.grad_fn)

y: tensor([[ 2.4599, -0.4081],
        [ 2.4585,  1.6637]], grad_fn=<AddBackward0>)
y.grad: None
y.requires_grad: True
y.grad_fn: <AddBackward0 object at 0x7f97c28b84d0>
z: tensor([[18.1530,  0.4996],
        [18.1327,  8.3038]], grad_fn=<MulBackward0>)
z.grad: None
z.requires_grad: True
z.grad_fn: <MulBackward0 object at 0x7f97c28b82d0>

/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the gradient for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations.
  
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the gradient for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations.
  import sys


x = torch.randn(3, requires_grad=True)

print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


print(x.requires_grad)
y = x.detach()
print(y.requires_grad)
print(x.eq(y).all())

True
False
tensor(True)


# First lets try to code only a single step of gradient descent

x = torch.randn(1, requires_grad=True)
print('x:', x)

y = (x - 4) ** 2
y.backward()

learning_rate = 0.1

print('x.grad:', x.grad)
print('x - x.grad:', x - learning_rate * x.grad)
with torch.no_grad():
    x = x - learning_rate * x.grad
print('x:', x)
x.requires_grad = True
print('x:', x)

x: tensor([0.2818], requires_grad=True)
x.grad: tensor([-7.4363])
x - x.grad: tensor([1.0255], grad_fn=<SubBackward0>)
x: tensor([1.0255])
x: tensor([1.0255], requires_grad=True)


# Gradient Descent till convergence

x = torch.randn(1, requires_grad=True)
print('Initial x:', x)

max_iter = 100
learning_rate = 0.1
eps = 1e-6

for _ in range(max_iter):
    y = (x - 4) ** 2
    y.backward()
    with torch.no_grad():
        prev_x = x
        x = x - learning_rate * x.grad
        if torch.norm(prev_x - x) < eps:
            break
    x.requires_grad = True

print('Soln x:', x)

Initial x: tensor([-1.2967], requires_grad=True)
Soln x: tensor([4.0000])

Avisek Gupta

Indian Statistical Institute, Kolkata

Tutorial 2: Tensor Gradients and Optimization

1. Computing Tensor Gradients :

(i) Every Tensor has a flag ''requires_grad'' that allows for fine grained exclusion of subgraphs from gradient computation

(ii) By default, the gradients of user-created tensors are not calculated.

(iii) The backward() function calculates the gradients.

(iv) We cannot print the gradients of intermediate tensors y or z, even though they are calculated.

(v) Only if ALL inputs tensors DO NOT require gradients, the gradient of the output tensor will NOT be tracked.

2. Stop automatic computation of gradients on Tensors with "requires_grad=True" either by -

(i) Wrapping the code block in with torch.no_grad(), or

(ii) By using .detach() to get a new Tensor with the same content but that does not require gradients

3. A simple gradient descent code using only backward() to compute the gradients

Recap:

1. Computing Tensor Gradients :

(i) Every Tensor has a flag ''requires_grad'' that allows for fine grained exclusion of subgraphs from gradient computation

(ii) By default, the gradients of user-created tensors are not calculated.

(iii) The backward() function calculates the gradients.

(iv) We cannot print the gradients of intermediate tensors y or z, even though they are calculated.

(v) Only if ALL inputs tensors DO NOT require gradients, the gradient of the output tensor will NOT be tracked.

2. Stop automatic computation of gradients on Tensors with "requires_grad=True" either by -

(i) Wrapping the code block in with torch.no_grad(), or

(ii) By using .detach() to get a new Tensor with the same content but that does not require gradients

3. A simple gradient descent code using only backward() to compute the gradients