当前位置:网站首页>Pytorch model
Pytorch model
2022-06-28 13:23:00 【Gu_ NN】
Catalog
Basic model definition method
pytorch Provided in nn.Sequential()、nn.ModuleList() as well as nn.ModuleDict() For integrating multiple Module, Complete the model construction . The similarities and differences are as follows :
| Sequential() | ModuleList() /ModuleDict() |
|---|---|
| Build the network directly , The definition order is the model connection order | List/Dict The order of elements in the network does not represent the order of their real positions in the network , need forward Function to specify the connection order of each layer |
| External input cannot be added to the model | When the information of the previous layer is required in the middle of the model , such as ResNets Residual calculation in , It's more convenient |
adopt nn.Sequential()
# Method 1 :
import torch.nn as nn
net = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 10),
)
# Method 2 :
import collections
net2 = nn.Sequential(collections.OrderedDict([
('fc1', nn.Linear(784, 256)),
('relu1', nn.ReLU()),
('fc2', nn.Linear(256, 10))
]))
adopt nn.ModuleList()/nn.ModuleDict()
# List
class model(nn.Module):
def __init__(self):
super().__init__()
self.modulelist = nn.ModuleList([nn.Linear(784, 256), nn.ReLU(),nn.Linear(256, 10)])
def forward(self, x):
for layer in self.modulelist:
x = layer(x)
return x
# Dict
class model(nn.Module):
def __init__(self):
super().__init__()
self.moduledict = nn.ModuleDict({
'linear': nn.Linear(784, 256),
'act': nn.ReLU(),
'output':nn.Linear(256, 10)
})
def forward(self, x):
for layer in self.moduledict:
x = layer(x)
return x
Complex model building methods
For large complex models , You can block the model first , Then the model is built . With U-Net The model, for example .
The picture above shows U-Net Network structure , It can be divided into the following four modules :
- Two convolutions inside each sub block (Double Convolution)
- Downsampling connections between model blocks on the left , That is, maximize pooling (Max pooling)
- Upsampling connections between model blocks on the right (Up sampling)
- Processing of output layer
Module building
import torch
import torch.nn as nn
import torch.nn.functional as F
class DoubleConv(nn.Module):
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.double_conv(x)
class Down(nn.Module):
"""Downscaling with maxpool then double conv"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.maxpool_conv = nn.Sequential(
nn.MaxPool2d(2),
DoubleConv(in_channels, out_channels)
)
def forward(self, x):
return self.maxpool_conv(x)
class Up(nn.Module):
"""Upscaling then double conv"""
def __init__(self, in_channels, out_channels, bilinear=True):
super().__init__()
# if bilinear, use the normal convolutions to reduce the number of channels
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
else:
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
self.conv = DoubleConv(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
class OutConv(nn.Module):
def __init__(self, in_channels, out_channels):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
def forward(self, x):
return self.conv(x)
Model assembly
class UNet(nn.Module):
def __init__(self, n_channels, n_classes, bilinear=True):
super(UNet, self).__init__()
self.n_channels = n_channels
self.n_classes = n_classes
self.bilinear = bilinear
self.inc = DoubleConv(n_channels, 64)
self.down1 = Down(64, 128)
self.down2 = Down(128, 256)
self.down3 = Down(256, 512)
factor = 2 if bilinear else 1
self.down4 = Down(512, 1024 // factor)
self.up1 = Up(1024, 512 // factor, bilinear)
self.up2 = Up(512, 256 // factor, bilinear)
self.up3 = Up(256, 128 // factor, bilinear)
self.up4 = Up(128, 64, bilinear)
self.outc = OutConv(64, n_classes)
def forward(self, x):
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x = self.up1(x5, x4)
x = self.up2(x, x3)
x = self.up3(x, x2)
x = self.up4(x, x1)
logits = self.outc(x)
return logits
Modification of existing models
Replace a layer
import torchvision.models as models
net = models.resnet50()
print(net)
# Replace with fc layer
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(2048, 128)),
('relu1', nn.ReLU()),
('dropout1',nn.Dropout(0.5)),
('fc2', nn.Linear(128, 10)),
('output', nn.Softmax(dim=1))
]))
# This sentence is directly used to define classifier Replace the original fc layer
net.fc = classifier
Add input variables
# Define model modifications
class Model(nn.Module):
def __init__(self, net):
super(Model, self).__init__()
# Original network structure
self.net = net
# First the 2048 Dimensional tensor By activating the function layer
self.relu = nn.ReLU()
# dropout layer
self.dropout = nn.Dropout(0.5)
# The full connection layer maps to the specified output dimension 10
self.fc_add = nn.Linear(1001, 10, bias=True)
self.output = nn.Softmax(dim=1)
def forward(self, x, add_variable):
x = self.net(x)
# In active layer 、dropout After layer, it is spliced with external input variables
x = torch.cat((self.dropout(self.relu(x)), add_variable.unsqueeze(1)),1) #unsqueeze The operation is to communicate with net Output tensor Keep dimensions consistent , Commonly used in add_variable Is a single value (scalar) The situation of
x = self.fc_add(x)
x = self.output(x)
return x
# Instantiation
model = Model(net).cuda()
# Training
outputs = model(inputs, add_var)
Add output variables
class Model(nn.Module):
def __init__(self, net):
super(Model, self).__init__()
self.net = net
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.fc1 = nn.Linear(1000, 10, bias=True)
self.output = nn.Softmax(dim=1)
def forward(self, x, add_variable):
x1000 = self.net(x)
x10 = self.dropout(self.relu(x1000))
x10 = self.fc1(x10)
x10 = self.output(x10)
return x10, x1000 # Increase output
model = Model(net).cuda()
out10, out1000 = model(inputs, add_var)
Model preservation 、 load
PyTorch The storage model mainly adopts pkl,pt,pth Three formats .
PyTorch The model mainly consists of two parts : Model structure and The weight .
- Model :nn.Module Class
- The weight : Dictionaries (key It's the layer name ,value It's the weight vector ).
Storage can also be divided into two forms :
- Storage model structure + The weight
- Store only weights
from torchvision import models
model = models.resnet152(pretrained=True)
# Save the entire model
torch.save(model, save_dir)
# Save model weights
torch.save(model.state_dict, save_dir)
When there is more GPU There will be a single card for storage and reading in parallel 、 Multi card situation , The multi card stored procedure has more names than the single card module Field , Therefore, when multiple cards are stored , Model loading will be more complicated .
preservation
Single card storage
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Replace here with the desired GPU Number
model = models.resnet152(pretrained=True)
model.cuda()
# Save the entire model
torch.save(model, save_dir)
# Save model weights
torch.save(model.state_dict(), save_dir)
Multi card storage
use nn.DataParallel Function for distributed training settings
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2' # Replace here with the desired GPU Number
model = models.resnet152(pretrained=True)
model = nn.DataParallel(model).cuda()
# Save the entire model
torch.save(model, save_dir)
# Save model weights
torch.save(model.state_dict(), save_dir)
load
Single card loading
- Save the model with a single card
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Replace here with the desired GPU Number
# Read the whole model
loaded_model = torch.load(save_dir)
loaded_model.cuda()
# Read model weights
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152() # Note that the model structure needs to be defined here
loaded_model.state_dict = loaded_dict
loaded_model.cuda()
- Multi card save model
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Replace here with the desired GPU Number
# Read the whole model
loaded_model = torch.load(save_dir)
loaded_model = loaded_model.module # The difference
# Read model weights ( recommend )
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152() # Note that the model structure needs to be defined here
loaded_model = nn.DataParallel(loaded_model).cuda() # The difference
loaded_model.state_dict = loaded_dict
# Read model weights ( Other methods 1)
from collections import OrderedDict
loaded_dict = torch.load(save_dir)
# Remove module Field
new_state_dict = OrderedDict()
for k, v in loaded_dict.items():
name = k[7:] # module The field is at the top , From 7 You can remove... From the beginning of a character module
new_state_dict[name] = v # Of the new dictionary key Value corresponding value One-to-one correspondence
# Others are consistent with the single card storage model
loaded_model = models.resnet152()
loaded_model.state_dict = new_state_dict
loaded_model = loaded_model.cuda()
# Read model weights ( Other methods 2)
loaded_model = models.resnet152()
loaded_dict = torch.load(save_dir)
loaded_model.load_state_dict({
k.replace('module.', ''): v for k, v in loaded_dict.items()})
loaded_model = loaded_model.cuda()
Multi card loading
- Single card storage model
use nn.DataParallel Function for distributed training settings
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '1,2' # Replace here with the desired GPU Number
# Read the whole model
loaded_model = torch.load(save_dir)
loaded_model = nn.DataParallel(loaded_model).cuda()# Different places
# Read model weights
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152() # Note that the model structure needs to be defined here
loaded_model.state_dict = loaded_dict
loaded_model = nn.DataParallel(loaded_model).cuda()# Different places
- Multi card storage model
It is recommended to store only weights , It is the same as a single card . If only the whole model , You need the following code :
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2' # Replace here with the desired GPU Number
loaded_whole_model = torch.load(save_dir)
loaded_model = models.resnet152() # Note that the model structure needs to be defined here
loaded_model.state_dict = loaded_whole_model.state_dict
loaded_model = nn.DataParallel(loaded_model).cuda()
Reference resources
datawhale: Explain profound theories in simple language pytorch
边栏推荐
- Jerry's wif interferes with Bluetooth [chapter]
- Scratch travel photo album Electronic Society graphical programming scratch grade examination level 1 true questions and answers analysis June 2022
- yii2编写swoole的websocket服务
- (原创)【MAUI】一步一步实现“悬浮操作按钮”(FAB,Floating Action Button)
- 从pdb源码到frame帧对象
- 决策树预测红酒品质
- 同花顺上怎么进行开户啊, 安全吗
- My hematemesis collection integrates script teaching from various classic shell books. As Xiaobai, come quickly
- Stackoverflow 2022 database annual survey
- The English translation of heartless sword Zhu Xi's two impressions of reading
猜你喜欢

Vs2012 VC creates a new blank window application
![(original) [Maui] realize](/img/76/d79b9cf4ed44870bb20a189315def9.jpg)
(original) [Maui] realize "floating action button" step by step

单元测试 CI/CD

Align content attribute in flex layout

go template with... End traversal usage

How does Quanzhi v853 chip switch sensors on Tina v85x platform?

Centos6.5 php+mysql MySQL library not found
JS class is not just a simple syntax sugar!

The counter attack story of Fu Jie, a young secondary school student: I spent 20 years from the second undergraduate to the ICLR outstanding Thesis Award

The $980000 SaaS project failed
随机推荐
如何在熊市中寻找机会?
Centos6.5 php+mysql MySQL library not found
Which company has a low rate for opening a securities account? How to open an account is the safest
Mature case and source code of hash quiz game system development technology
1015. picking flowers
redis和mysql数据不一致问题如何解决?
MySQL多表联合查询
Vscode shortcut key
Fh511+tp4333 form an outdoor mobile power lighting camping lamp scheme.
2022年中国运维安全产品市场规模及发展趋势预测分析
嵌入式开发:估算电池寿命的7个技巧
flutter 系列之:flutter 中常用的 GridView layout 详解
基于SSM实现水果蔬菜商城管理系统
简历模板百度网盘自取
Mobile web training day-1
单元测试 CI/CD
You must configure either the server or JDBC driver (via the ‘serverTimezone‘ configuration property
Stm32f1 and stm32cubeide programming example - matrix keyboard driver
决策树预测红酒品质
You must configure either the server or JDBC driver (via the ‘serverTimezone‘ configuration property