当前位置：网站首页>Pytoch learning record (III): random gradient descent, neural network and full connection

Pytoch learning record (III): random gradient descent, neural network and full connection

2022-07-28 21:36:00 【Arina, beaver】

List of articles

1. Stochastic gradient descent
2. Neural network and full connection

1. Stochastic gradient descent

1.1 Activation function and its gradient

1.1.1 Sigmoid / Logistic

Insert picture description here

import torch

a = torch.linspace(-100,100,100,requires_grad=True)
b = torch.sigmoid(a)
print(b)

''' tensor([0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.8349e-39, 5.1533e-38, 3.8855e-37, 2.9296e-36, 2.2089e-35, 1.6655e-34, 1.2557e-33, 9.4681e-33, 7.1388e-32, 5.3825e-31, 4.0584e-30, 3.0599e-29, 2.3072e-28, 1.7396e-27, 1.3116e-26, 9.8893e-26, 7.4564e-25, 5.6220e-24, 4.2389e-23, 3.1961e-22, 2.4098e-21, 1.8169e-20, 1.3699e-19, 1.0329e-18, 7.7881e-18, 5.8721e-17, 4.4274e-16, 3.3382e-15, 2.5170e-14, 1.8978e-13, 1.4309e-12, 1.0789e-11, 8.1345e-11, 6.1333e-10, 4.6244e-09, 3.4867e-08, 2.6289e-07, 1.9822e-06, 1.4945e-05, 1.1267e-04, 8.4891e-04, 6.3653e-03, 4.6075e-02, 2.6696e-01, 7.3304e-01, 9.5392e-01, 9.9363e-01, 9.9915e-01, 9.9989e-01, 9.9999e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00], grad_fn=<SigmoidBackward>) '''

1.1.2 Tanh

Insert picture description here

import torch

a = torch.linspace(-1,1,10,requires_grad=True)
b = torch.tanh(a)
print(b)

''' tensor([-0.7616, -0.6514, -0.5047, -0.3215, -0.1107, 0.1107, 0.3215, 0.5047, 0.6514, 0.7616], grad_fn=<TanhBackward>) '''

1.1.3 ReLU

Insert picture description here

import torch
import torch.nn.functional as F

a = torch.linspace(-1,1,10,requires_grad=True)
b = torch.relu(a)
c = F.relu(a)
print(b)
print(c)

''' tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778, 1.0000], grad_fn=<ReluBackward0>) tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778, 1.0000], grad_fn=<ReluBackward0>) '''

1.2 Loss Function and its gradient

1.2.1 MSE

Insert picture description here

1.2.2 autograd.grad() Find gradient

import torch
import torch.nn.functional as F

x = torch.ones(1)
w = torch.full([1],2., requires_grad=True) # The length is 1, The value is 2
mse = F.mse_loss(torch.ones(1), x*w)  # The first parameter is zero predict value   The second is label value 
print(mse)
print(torch.autograd.grad(mse, [w]))

''' tensor(1., grad_fn=<MseLossBackward>) (tensor([2.]),) '''

1.2.3 loss.backward() Find gradient

import torch
import torch.nn.functional as F

x = torch.ones(1)
w = torch.full([1],2., requires_grad=True) # The length is 1, The value is 2
mse = F.mse_loss(torch.ones(1), x*w)  # The first parameter is zero predict value   The second is label value 
print(mse)
mse.backward()
print(w.grad)

''' tensor(1., grad_fn=<MseLossBackward>) tensor([2.]) '''

1.2.4 Softmax

import torch
import torch.nn.functional as F

a = torch.rand(3)
a.requires_grad_()
print(a)
p = F.softmax(a, dim = 0)
print(p)
print(torch.autograd.grad(p[0], [a], retain_graph=True))  # The dynamic graph will not be cleared , You can find the gradient several times in a row , Or many times backward
print(torch.autograd.grad(p[1], [a], retain_graph=True))
print(torch.autograd.grad(p[2], [a], retain_graph=True))

''' tensor([0.8659, 0.0540, 0.4153], requires_grad=True) tensor([0.4805, 0.2133, 0.3062], grad_fn=<SoftmaxBackward>) (tensor([ 0.2496, -0.1025, -0.1471]),) (tensor([-0.1025, 0.1678, -0.0653]),) (tensor([-0.1471, -0.0653, 0.2124]),) '''

2. Neural network and full connection

2.1 Entropy

Insert picture description here

2.2 Multiple classification problem

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms


w1, b1 = torch.randn(200, 784, requires_grad=True),\
         torch.zeros(200, requires_grad=True)
w2, b2 = torch.randn(200, 200, requires_grad=True),\
         torch.zeros(200, requires_grad=True)
w3, b3 = torch.randn(10, 200, requires_grad=True),\
         torch.zeros(10, requires_grad=True)

torch.nn.init.kaiming_normal_(w1)
torch.nn.init.kaiming_normal_(w2)
torch.nn.init.kaiming_normal_(w3)
def forward(x):
    x = [email protected].t() + b1
    x = F.relu(x)
    x = [email protected].t() + b2
    x = F.relu(x)
    x = [email protected].t() + b3
    x = F.relu(x)  # It can also be deactivated here 
    return x

optimizer = torch.optim.SGD([w1,b1,w2,b2,w3,b3], lr = 1e-2)
criteon = nn.CrossEntropyLoss()
epoches = 10
batch_size = 200
minist_train = datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))]
))
minist_val = datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))]
))
train_loader = torch.utils.data.DataLoader(minist_train, batch_size = batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(minist_val, batch_size = batch_size, shuffle = False)

for epoch in range(epoches):
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(-1, 28*28)
        logits = forward(data)
        loss =criteon(logits, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, \
                batch_idx*len(data), len(train_loader.dataset), 100. *batch_idx/len(train_loader),
                loss.item()))

    test_loss = 0
    total_correct = 0
    for data, target in val_loader:
        data = data.view(-1, 28*28)
        logits = forward(data) #(N, classes)
        loss = criteon(logits, target)
        test_loss += loss.item()
        pred = logits.data.max(dim=1)[1]
        correct = pred.eq(target.data).sum()
        total_correct += correct

    test_loss /= len(val_loader.dataset)
    accuracy = total_correct / len(val_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, total_correct, len(val_loader.dataset),
            100. * accuracy))

''' ... Train epoch: 7 [0/60000 (0%)] Loss: 0.188797 Train epoch: 7 [20000/60000 (33%)] Loss: 0.157730 Train epoch: 7 [40000/60000 (67%)] Loss: 0.153730 Test set: Average loss: 0.0008, Accuracy: 9513/10000 (95%) Train epoch: 8 [0/60000 (0%)] Loss: 0.242635 Train epoch: 8 [20000/60000 (33%)] Loss: 0.092858 Train epoch: 8 [40000/60000 (67%)] Loss: 0.165861 Test set: Average loss: 0.0008, Accuracy: 9540/10000 (95%) Train epoch: 9 [0/60000 (0%)] Loss: 0.099372 Train epoch: 9 [20000/60000 (33%)] Loss: 0.118166 Train epoch: 9 [40000/60000 (67%)] Loss: 0.155070 Test set: Average loss: 0.0007, Accuracy: 9556/10000 (96%) '''

2.3 Fully connected layer

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(784,200),
            nn.ReLU(inplace=True),
            nn.Linear(200,200),
            nn.ReLU(inplace=True),
            nn.Linear(200,10),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.model(x)
mlp = MLP()
optimizer = torch.optim.SGD(mlp.parameters(), lr = 1e-2)