当前位置：网站首页>Detailed notes on pytoch building neural network

Detailed notes on pytoch building neural network

2022-07-24 07:53:00 【Shadow follows】

Catalog

Steps of building neural network
Packages that need to be imported
Download training set
Building neural networks
Neural network training
Test network
stay GPU Training
Complete code

Steps of building neural network

Train a picture classifier
adopt torchvision load CIFAR10 Inside the training and test data set , And standardize the data
Define convolutional neural networks
Define the loss function
Use training data to train the network
Test the network with test data

Packages that need to be imported

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import ssl
import matplotlib.pyplot as plt
import numpy as np

Download training set

This part will be automatically downloaded when it runs for the first time , stay linux In the environment num_workers You can set parameters

ssl._create_default_https_context = ssl._create_unverified_context

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Building neural networks

Here we use (3, 32, 32) For example , among 3 Refers to the picture of three channels , Namely red, yellow and blue , The pixel size is 32 × 32

Convolution layer calculation formula ：( Pixel length and width - Convolution kernel length and width ) / step + 1
self.conv1 = nn.Conv2d(3, 6, 5)
Through the convolution layer , Input 3 passageway , Output 6 passageway , step 1 × 1, Convolution kernels for 5 × 5
The image becomes (6, 28, 28) Structure

self.pool = nn.MaxPool2d(2, 2)
Through pool layer , step 2 × 2, The image is zoomed out
The image becomes (6, 14, 14) Structure

self.conv2 = nn.Conv2d(6, 16, 5)
Through the convolution layer , Input 6 passageway , Output 16 passageway , Convolution kernels for 5 × 5
The original picture becomes (16, 10, 10) Structure

self.pool = nn.MaxPool2d(2, 2)
Through pool layer , step 2 × 2, The image is zoomed out
The image becomes (16, 5, 5) Structure

self.fc1 = nn.Linear(16 * 5 * 5, 120)
Through the full connectivity layer , Follow me relu Activation function
Space comes from 16 × 5 × 5 Compress it into 120

self.fc2 = nn.Linear(120, 84)
Through the full connectivity layer , Follow me relu Activation function
Space comes from 120 Compress it into 84

self.fc3 = nn.Linear(84, 10)
Through the full connectivity layer , The activation function is not followed
Space comes from 84 Compress it into 10

#  Visual training data 
def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    #  Dimension transformation 
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


#  Get training pictures randomly 
# dataiter = iter(trainloader)
# images, labels = dataiter.next()
#  display picture ,make_grid  Collage image 
# imshow((torchvision.utils.make_grid(images)))


#  Print picture labels , The length of each element here is the same , Add in the middle ' ' So it will lead to different spacing 
# print(' '.join('%5s' % classes[labels[j]] for j in range(4)))


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


#  Look at neural networks 
net = Net()
print(net)

Running results

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

Neural network training

def train():
    for epoch in range(3):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            #  Back propagation , Parameters are updated 
            loss.backward()
            optimizer.step()
            #  Print rounds and loss values 
            running_loss += loss.item()
            if (i + 1) % 2000 == 0:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    print('Finished Training')
    torch.save(net.state_dict(), PATH)
    print('Finished Saving')


#  Define the loss function and optimizer , Cross entropy loss function and random gradient descent optimizer 
criterion = nn.CrossEntropyLoss()
# net.parameters(): The weight w, bias b.lr： Learning rate .momentum： impulse （ The acceleration ）
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#  Training , Save the training model 
PATH = './cifar_net.pth'
train()

Running results

[1,  2000] loss: 2.127
[1,  4000] loss: 1.800
[1,  6000] loss: 1.656
[1,  8000] loss: 1.566
[1, 10000] loss: 1.501
[1, 12000] loss: 1.442
[2,  2000] loss: 1.403
[2,  4000] loss: 1.364
[2,  6000] loss: 1.340
[2,  8000] loss: 1.323
[2, 10000] loss: 1.309
[2, 12000] loss: 1.293
[3,  2000] loss: 1.204
[3,  4000] loss: 1.225
[3,  6000] loss: 1.223
[3,  8000] loss: 1.200
[3, 10000] loss: 1.198
[3, 12000] loss: 1.207
Finished Training
Finished Saving

Test network

In this part, it is found that the training effect of different labels is different , It has a certain relationship with the number of images and training , Failed to achieve 100% It's a normal phenomenon .

#  Output pictures 
dataiter = iter(testloader)
images, labels = dataiter.next()
#  original image 
imshow(torchvision.utils.make_grid(images))
#  Real label 
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

net = Net()
net.load_state_dict(torch.load(PATH))
#  Use the model to predict the picture 
outputs = net(images)
#  common 10 Categories . The category with the highest probability calculated by the model is used as the prediction category 
#  No addition _, Returns the largest number in a row . Add _, Returns the position of the maximum number in a row .
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

#  Test classes that perform well , Poor performing classes 
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Running results

GroundTruth:    cat  ship  ship plane
Predicted:   ship  ship  ship  ship
Accuracy of the network on the 10000 test images: 56 %
Accuracy of plane : 42 %
Accuracy of   car : 85 %
Accuracy of  bird : 47 %
Accuracy of   cat : 27 %
Accuracy of  deer : 42 %
Accuracy of   dog : 56 %
Accuracy of  frog : 80 %
Accuracy of horse : 59 %
Accuracy of  ship : 80 %
Accuracy of truck : 41 %

Insert picture description here

stay GPU Training

adopt to() Methods to adapt , Still can be in CPU Up operation

net.to(device)
inputs, labels = inputs.to(device), labels.to(device)

Complete code

#  Train a picture classifier 
#  adopt torchvision load CIFAR10 Inside the training and test data set , And standardize the data 
#  Define convolutional neural networks 
#  Define the loss function 
#  Use training data to train the network 
#  Test the network with test data 
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import ssl
import matplotlib.pyplot as plt
import numpy as np

#  Download training set 
#  Cancel certificate validation globally 
ssl._create_default_https_context = ssl._create_unverified_context

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


#  Visual training data 
def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    #  Dimension transformation 
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


#  Get training pictures randomly 
# dataiter = iter(trainloader)
# images, labels = dataiter.next()
#  display picture ,make_grid  Collage image 
# imshow((torchvision.utils.make_grid(images)))


#  Print picture labels , The length of each element here is the same , Add in the middle ' ' So it will lead to different spacing 
# print(' '.join('%5s' % classes[labels[j]] for j in range(4)))


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


def train():
    for epoch in range(3):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            #  Back propagation , Parameters are updated 
            loss.backward()
            optimizer.step()
            #  Print rounds and loss values 
            running_loss += loss.item()
            if (i + 1) % 2000 == 0:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    print('Finished Training')
    torch.save(net.state_dict(), PATH)
    print('Finished Saving')


#  Look at neural networks 
net = Net()
print(net)

#  Define the loss function and optimizer , Cross entropy loss function and random gradient descent optimizer 
criterion = nn.CrossEntropyLoss()
# net.parameters(): The weight w, bias b.lr： Learning rate .momentum： impulse （ The acceleration ）
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#  Training , Save the training model 
PATH = './cifar_net.pth'
train()

#  Output pictures 
dataiter = iter(testloader)
images, labels = dataiter.next()
#  original image 
imshow(torchvision.utils.make_grid(images))
#  Real label 
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

net = Net()
net.load_state_dict(torch.load(PATH))
#  Use the model to predict the picture 
outputs = net(images)
#  common 10 Categories . The category with the highest probability calculated by the model is used as the prediction category 
#  No addition _, Returns the largest number in a row . Add _, Returns the position of the maximum number in a row .
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

#  Test classes that perform well , Poor performing classes 
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

#  stay GPU Training 
# net.to(device)
# inputs, labels = inputs.to(device), labels.to(device)