当前位置:网站首页>Pytorch has fixed parameters and detach clone
Pytorch has fixed parameters and detach clone
2022-06-10 11:38:00 【MallocLu】
detach
detach() After tensor And the original tensor Shared data memory , When primitive tensor After the values in the calculation chart are updated such as back propagation ,detach() Of tensor The value has also changed .
import torch
from torch import optim
from torch.nn import Parameter
x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))
y = a**2 * x + b * x
z = y**2 + 2*y
optimizer = optim.SGD([a, b], lr=0.01)
ta = a.detach()
tb = b.detach()
print('before:', a, b, ta, tb)
print()
optimizer.zero_grad()
z.backward()
optimizer.step()
print('before:', a, b, ta, tb)
# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True) tensor(1.) tensor(2.)
#
# before: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True) tensor(0.8400) tensor(1.9200)
clone
clone New memory used , When primitive tensor After the values in the calculation chart are updated such as back propagation ,clone() Of tensor The value does not change .
If the new tensor from tensor(requires_grad=True) perhaps Parameter Cloned from , Then grad_fn=, That means it can still Propagate the gradient as an intermediate node , Equivalent to an identity mapping .
import torch
from torch import optim
from torch.nn import Parameter
x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))
y = a**2 * x + b * x
z = y**2 + 2*y
optimizer = optim.SGD([a, b], lr=0.01)
ta = a.clone()
tb = b.clone()
print('before:', a, b, ta, tb)
print()
optimizer.zero_grad()
z.backward()
optimizer.step()
print('before:', a, b, ta, tb)
# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True) tensor(1., grad_fn=<CloneBackward>) tensor(2., grad_fn=<CloneBackward>)
#
# before: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True) tensor(1., grad_fn=<CloneBackward>) tensor(2., grad_fn=<CloneBackward>)
import torch
from torch import optim
from torch.nn import Parameter
x = torch.tensor(1.)
a = torch.tensor(1., requires_grad=True)
b = Parameter(torch.tensor(2.))
y = a**2 * x + b * x
z = y**2 + 2*y
# from z -> z2 Equivalent to an identity map , Gradients can still propagate
z2 = z.clone()
optimizer = optim.SGD([a, b], lr=0.01)
print('before:', a, b)
print()
optimizer.zero_grad()
z2.backward()
optimizer.step()
print('before:', a, b)
# before: tensor(1., requires_grad=True) Parameter containing:
# tensor(2., requires_grad=True)
#
# before: tensor(0.8400, requires_grad=True) Parameter containing:
# tensor(1.9200, requires_grad=True)
lambda
from functools import reduce
foo = [2, 18, 9, 22, 17, 24, 8, 12, 27]
# lambda In the sentence , The colon is preceded by a parameter , There can be multiple , Separated by commas , The return value to the right of the colon .
# collocation filter
print(filter(lambda x: x % 3 == 0, foo))
print(list(filter(lambda x: x % 3 == 0, foo)))
print()
# collocation map
print(map(lambda x: x * 2 + 10, foo))
print(list(map(lambda x: x * 2 + 10, foo)))
print()
# collocation reduce
# reduce Add up ,lambda To calculate x2 + y18 = 20, then x20 + y9 = 29
print(reduce(lambda x, y: x + y, foo))
# <filter object at 0x000002206C252A88>
# [18, 9, 24, 12, 27]
#
# <map object at 0x000002206C1FF608>
# [14, 46, 28, 54, 44, 58, 26, 34, 64]
#
# 139
detach Fixed part of the parameters
shortcoming : Only fixed detach All previous parameters
# stay Net.forward In the delete out = out.detach(), The result is :
# layer1.weight False
# layer1.bias False
# layer2.weight False
# layer2.bias False
# That is, all parameters have changed ( Optimize )
# stay Net.forward Add in out = out.detach(), The result is :
# layer1.weight True
# layer1.bias True
# layer2.weight False
# layer2.bias False
# namely self.layer1 There is no inversion of the parameters in , This is because out = out.detach() Back to tensor Cannot propagate gradients ,
# So back propagation to the tensor when , Can't spread any further , So the previous parameters will be locked
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layer1 = nn.Linear(10, 5)
self.layer2 = nn.Linear(5, 3)
def forward(self, x):
out = self.layer1(x)
out = out.detach()
out = F.relu(self.layer2(out))
return out
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)
input = torch.randn(8, 10)
# Store the values of each parameter before training
storeParam = {
}
for name, param in net.named_parameters():
storeParam[name] = param.detach().clone()
for i in range(100):
out = net(input)
loss = F.mse_loss(out, torch.zeros(8, 3))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Compare the values of each parameter before and after training
for name, param in net.named_parameters():
print(f"{
name} {
torch.equal(param, storeParam[name])}")
requires_grad = False Fixed part of the parameters
# adopt self.layer1.weight.requires_grad = False, Fix this parameter only
# loss Of requires_grad=False It cannot call backward; some non-leaf Tensor Of requires_grad=False Then the previous parameters are truncated due to gradient back propagation ,
# So it won't be updated ;leaf Tensor Of requires_grad=False, Indicates that it does not require gradients , So it cannot be updated .
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layer1 = nn.Linear(10, 5)
self.layer1.weight.requires_grad = False
self.layer2 = nn.Linear(5, 3)
def forward(self, x):
out = self.layer1(x)
out = F.relu(self.layer2(out))
return out
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)
input = torch.randn(8, 10)
# Store the values of each parameter before training
storeParam = {
}
for name, param in net.named_parameters():
storeParam[name] = param.detach().clone()
for i in range(100):
out = net(input)
loss = F.mse_loss(out, torch.zeros(8, 3))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Compare the values of each parameter before and after training
for name, param in net.named_parameters():
print(f"{
name} {
torch.equal(param, storeParam[name])}")
# layer1.weight True
# layer1.bias False
# layer2.weight False
# layer2.bias False
边栏推荐
- [WIP] Openstack Masakari (by quqi99)
- 入门篇:从 etcd 名字的由来,到安装部署和使用姿势
- [WIP] Openstack Masakari (by quqi99)
- 北大、微软|关于使语言模型更好的推理的进展
- QA of some high frequency problems in oauth2 learning
- Detailed explanation of redis
- How zoom closes the microphone when joining a meeting
- “易 +”开源计划丨基于 WebRTC 的低延时播放器设计和实践
- 图文,文字预训练方式长期学习ing。
- LocalDateTime与String日期互相转换
猜你喜欢

2022 CISCN 初赛pwn完整wp
![[PaperNote] Confidential Computing Direction](/img/1e/0a41a6bb6fd752061c4608d3133eed.png)
[PaperNote] Confidential Computing Direction

Start from 0 to build a high-performance R & D full stack team
![[WIP] Openstack Masakari (by quqi99)](/img/ea/c0a1c80251a76a6bb7fa0f58424591.png)
[WIP] Openstack Masakari (by quqi99)

About one-way linked list

flutter SocketIO示例

Unity打字机,使文本自动滚至文本框底部

The essence of linear algebra 4 matrix multiplication and linear compound transformation

十三、系统调用和Shell(freestanding shell, 终端和 job control)

Testing ovn manually based on LXD (by quqi99)
随机推荐
爱可可AI前沿推介(6.10)
【网易云信】深度剖析「圈组」关系系统设计 | 「圈组」技术系列文章
shape颜色渐变、圆角、半圆角、边框、填充
10款值得你去选择的AirPods Pro竞争产品
La poignée d'enseignement de la station B vous apprend à utiliser le masque yolov5 pour tester les éléments de l'enregistrement le plus complet (apprentissage profond / détection d'objets / pythorch)
zoom如何在加入会议时关闭microphone
基于Junit4的单元测试
MySQL数据类型
flutter SocketIO示例
Transfomer自实现与官方库
十三、系统调用和Shell(freestanding shell, 终端和 job control)
Practice of Flink CDC + Hudi massive data entering the lake in SF
Cvpr22 oral | Hong Kong Chinese proposed transrank: sequencing loss + self supervision =sota
为你推荐一款高效的IO组件——okio
浅谈分布式事务
87.(leaflet之家)leaflet军事标绘-直线箭头修改
Dynamic programming (MID)
Localdatetime and string date conversion
Transfomer components and pytoch
Recursive traversal and non recursive traversal of binary tree (C language version)