当前位置：网站首页>Pytorch has fixed parameters and detach clone

Pytorch has fixed parameters and detach clone

2022-06-10 11:38:00 【MallocLu】

detach

detach() After tensor And the original tensor Shared data memory , When primitive tensor After the values in the calculation chart are updated such as back propagation ,detach() Of tensor The value has also changed .

import torch
from torch import optim
from torch.nn import Parameter

x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))

y = a**2 * x + b * x

z = y**2 + 2*y

optimizer = optim.SGD([a, b], lr=0.01)

ta = a.detach()
tb = b.detach()

print('before:', a, b, ta, tb)
print()

optimizer.zero_grad()
z.backward()
optimizer.step()

print('before:', a, b, ta, tb)

# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True) tensor(1.) tensor(2.)
#
# before: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True) tensor(0.8400) tensor(1.9200)

clone

clone New memory used , When primitive tensor After the values in the calculation chart are updated such as back propagation ,clone() Of tensor The value does not change .

If the new tensor from tensor(requires_grad=True) perhaps Parameter Cloned from , Then grad_fn=, That means it can still Propagate the gradient as an intermediate node , Equivalent to an identity mapping .

import torch
from torch import optim
from torch.nn import Parameter

x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))

y = a**2 * x + b * x

z = y**2 + 2*y

optimizer = optim.SGD([a, b], lr=0.01)

ta = a.clone()
tb = b.clone()

print('before:', a, b, ta, tb)
print()

optimizer.zero_grad()
z.backward()
optimizer.step()

print('before:', a, b, ta, tb)

# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True) tensor(1., grad_fn=<CloneBackward>) tensor(2., grad_fn=<CloneBackward>)
# 
# before: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True) tensor(1., grad_fn=<CloneBackward>) tensor(2., grad_fn=<CloneBackward>)

import torch
from torch import optim
from torch.nn import Parameter

x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))

y = a**2 * x + b * x

z = y**2 + 2*y

#  from z -> z2 Equivalent to an identity map , Gradients can still propagate 
z2 = z.clone()

optimizer = optim.SGD([a, b], lr=0.01)
print('before:', a, b)
print()

optimizer.zero_grad()
z2.backward()
optimizer.step()

print('before:', a, b)

# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True)
# 
# before: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True)

lambda

from functools import reduce

foo = [2, 18, 9, 22, 17, 24, 8, 12, 27]

# lambda In the sentence , The colon is preceded by a parameter , There can be multiple , Separated by commas , The return value to the right of the colon .
#  collocation filter
print(filter(lambda x: x % 3 == 0, foo))
print(list(filter(lambda x: x % 3 == 0, foo)))
print()

#  collocation map
print(map(lambda x: x * 2 + 10, foo))
print(list(map(lambda x: x * 2 + 10, foo)))
print()

#  collocation reduce
# reduce Add up ,lambda To calculate x2 + y18 = 20, then x20 + y9 = 29
print(reduce(lambda x, y: x + y, foo))

# <filter object at 0x000002206C252A88>
# [18, 9, 24, 12, 27]
# 
# <map object at 0x000002206C1FF608>
# [14, 46, 28, 54, 44, 58, 26, 34, 64]
# 
# 139

detach Fixed part of the parameters

shortcoming ： Only fixed detach All previous parameters

#  stay Net.forward In the delete out = out.detach(), The result is ：
# layer1.weight False
# layer1.bias False
# layer2.weight False
# layer2.bias False
#  That is, all parameters have changed （ Optimize ）

#  stay Net.forward Add in out = out.detach(), The result is ：
# layer1.weight True
# layer1.bias True
# layer2.weight False
# layer2.bias False
#  namely self.layer1 There is no inversion of the parameters in , This is because out = out.detach() Back to tensor Cannot propagate gradients ,
#  So back propagation to the tensor when , Can't spread any further , So the previous parameters will be locked 

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.layer1 = nn.Linear(10, 5)
        self.layer2 = nn.Linear(5, 3)

    def forward(self, x):
        out = self.layer1(x)
        out = out.detach()
        out = F.relu(self.layer2(out))
        return out


net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)

input = torch.randn(8, 10)

#  Store the values of each parameter before training 
storeParam = {
    }
for name, param in net.named_parameters():
    storeParam[name] = param.detach().clone()

for i in range(100):
    out = net(input)
    loss = F.mse_loss(out, torch.zeros(8, 3))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

#  Compare the values of each parameter before and after training 
for name, param in net.named_parameters():
    print(f"{
      name} {
      torch.equal(param, storeParam[name])}")

requires_grad = False Fixed part of the parameters

#  adopt self.layer1.weight.requires_grad = False, Fix this parameter only 

# loss Of requires_grad=False It cannot call backward; some non-leaf Tensor Of requires_grad=False Then the previous parameters are truncated due to gradient back propagation ,
#  So it won't be updated ;leaf Tensor Of requires_grad=False, Indicates that it does not require gradients , So it cannot be updated .

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.layer1 = nn.Linear(10, 5)
        self.layer1.weight.requires_grad = False
        self.layer2 = nn.Linear(5, 3)

    def forward(self, x):
        out = self.layer1(x)
        out = F.relu(self.layer2(out))
        return out


net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)

input = torch.randn(8, 10)

#  Store the values of each parameter before training 
storeParam = {
    }
for name, param in net.named_parameters():
    storeParam[name] = param.detach().clone()

for i in range(100):
    out = net(input)
    loss = F.mse_loss(out, torch.zeros(8, 3))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

#  Compare the values of each parameter before and after training 
for name, param in net.named_parameters():
    print(f"{
      name} {
      torch.equal(param, storeParam[name])}")
    
# layer1.weight True
# layer1.bias False
# layer2.weight False
# layer2.bias False