当前位置:网站首页>Hands on deep learning -- discarding method and its code implementation

Hands on deep learning -- discarding method and its code implementation

2022-06-12 08:14:00 Orange acridine 21

One 、 The law of abandonment (dropout)

motivation : A good model needs to be robust to the disturbance of input data

Using noisy data is equivalent to Tikhonov Regular

The law of abandonment : Add noise between layers .

Use discard method , The discard method is usually used to hide the output of the connection layer .

The probability of discarding is a super parameter that controls the complexity of the model .

Two 、 Discard method is implemented from zero

from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

def dropout(X,drop_prob):
    X=X.float()
    assert 0<=drop_prob <=1
    keep_prob =1- drop_prob
    if keep_prob==0:
        return torch.zeros_like(X)
    mask=(torch.randn(X.shape)<keep_prob).float()
    return mask *X /keep_prob

X = torch.arange(16).view(2, 8)
dropout(X, 0)
dropout(X,0.5)
dropout(X,1.0)

"""
 Define model parameters 
"""
num_inputs,num_outputs,num_hiddens1,num_hiddens2=784,10,256,256
W1=torch.tensor(np.random.normal(0,0.01,size=(num_inputs,num_hiddens1)),dtype=torch.float,requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True,dtype=torch.float)

W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1,num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True,dtype=torch.float)

W3=torch.tensor(np.random.normal(0,0.01,size=(num_hiddens2,num_outputs)),dtype=torch.float,requires_grad=True)
b3=torch.zeros(num_outputs,requires_grad=True,dtype=torch.float)

params=[W1,b1,W2,b2,W3,b3]

"""
 Defining models 
"""
drop_prob1,drop_prob2=0.2,0.5
def net(X,is_training=True):
    X = X.view(-1, num_inputs)
    H1=(torch.matmul(X,W1)+b1).relu()

    """ Use discard only during training """
    if is_training:
        H1 = dropout(H1, drop_prob1) #  In the ⼀ Add discard layer after layer full connection 

    H2 = (torch.matmul(H1, W2) + b2).relu()

    if is_training:
        H2 = dropout(H2, drop_prob2)  #  In the ⼆ Add discard layer after layer full connection 
    return torch.matmul(H2, W3) + b3

"""
 Training and testing models 
"""
num_epochs, lr, batch_size = 5, 100.0, 256
loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs,batch_size, params, lr)

 

3、 ... and 、 The discarding method is implemented concisely

from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l


def dropout(X, drop_prob):
 X = X.float()
 assert 0 <= drop_prob <= 1
 keep_prob = 1 - drop_prob
 if keep_prob == 0:
  return torch.zeros_like(X)
 mask = (torch.randn(X.shape) < keep_prob).float()
 return mask * X / keep_prob


X = torch.arange(16).view(2, 8)
dropout(X, 0)
dropout(X, 0.5)
dropout(X, 1.0)

"""
 Define model parameters 
"""
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True, dtype=torch.float)

W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True, dtype=torch.float)

W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True)
b3 = torch.zeros(num_outputs, requires_grad=True, dtype=torch.float)

params = [W1, b1, W2, b2, W3, b3]

drop_prob1,drop_prob2=0.2,0.5

net = nn.Sequential(
      d2l.FlattenLayer(),
      nn.Linear(num_inputs, num_hiddens1),
      nn.ReLU(),
      nn.Dropout(drop_prob1),
      nn.Linear(num_hiddens1, num_hiddens2),
      nn.ReLU(),
      nn.Dropout(drop_prob2),
      nn.Linear(num_hiddens2, 10) )
for param in net.parameters():
      nn.init.normal_(param, mean=0, std=0.01)

"""
 Train and test models 
"""
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
num_epochs, lr, batch_size = 5, 100.0, 256
loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs,
               batch_size, None, None, optimizer)

 

原网站

版权声明
本文为[Orange acridine 21]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/03/202203010550043824.html