当前位置：网站首页>1、 Usage of common loss function

1、 Usage of common loss function

2022-07-29 06:08:00 【My hair is messy】

Preface

Common methods of defining loss function , It includes multi classification cross entropy 、 Mean square error 、 The usage of binary cross entropy . Its functions include ：1. Measure the difference between the output value of the model and the label value ;2. Evaluate the degree of inconsistency between the predicted value of the model and the real value ;3. The optimized objective function in neural network , The smaller the loss function , The closer the predicted value is to the real value , The robustness of the model is also better .

One 、L1-loss（MAE）、L2- loss（MSE）、smooth L1- loss、 What is the cross entropy loss function ？

Insert picture description here

Two 、 Use steps

1. Loss function method

The code is as follows （ Example ）：

# Define the loss function , Update gradient ----
loss_fn = torch.nn.CrossEntropyLoss()# The multi classification cross entropy does not need to be output by the activation power function 
# loss_fn=torch.nn.MSELoss()# Mean square error 
# loss_fn=torch.nn.BCELoss()# Two categories cross entropy 
# loss_fn=torch.nn.BCEWithLogitsLoss()# Automatically introduce activation function

2. Code operation

The code is as follows （ Example ）：

import torch
from torchvision import datasets,transforms
from torch.utils.data import DataLoader# Class is the core of loading data , Return iteratable data 
import os
import matplotlib.pyplot as plt


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()# Inherit 
        self.fc1 = torch.nn.Sequential(
            torch.nn.Linear(784,256),
            torch.nn.BatchNorm1d(256),
            torch.nn.ReLU())
        #nn.Sequential  Combine the network layer with the activation function , Output the activated network node .
        #nn.Linear（in_features,out_features,bias = True ） Apply a linear transformation to the incoming data 
        #784, The size of each input sample ---- That is to say 28*28, The pixel value of the image w*h
        # 256  The size of each output sample ----- namely 784 adopt Linear function 
        #BatchNorm1d(256),# Adaptive standardization - Normal distribution -- The input value falls in the sensitive area of nonlinear function , Avoid the problem of gradient disappearance 
        #nn.ReLU()  Activation function  relu
        self.fc2 = torch.nn.Sequential(
            torch.nn.Linear(256,128),
            torch.nn.BatchNorm1d(128),
            torch.nn.ReLU())
        self.fc3 = torch.nn.Linear(128,10)
    def forward(self,x):#forward Function to implement forward propagation operation 
        # print(x.shape)
        #N,C,H,W(batchsize,channels,x,y)-->N,V
        #x.size(0)==batchsize, There are several lines after conversion 
        # Finally through x.view(x.size(0), -1) take tensor The structure of is transformed into (batchsize, channels*x*y),
        #  the （channels,x,y） straighten , And then you can talk to fc Layers connect 
        #-1 Without telling the function how many columns there are , According to the original tensor Data and batchsize Auto assign columns .
        x = torch.reshape(x,[x.size(0),-1])# Transform shape , Switch to 2 dimension ,reshape=view
        # print(x.shape)
        y=self.fc1(x)#N,256
        y=self.fc2(y)#N,128     #y=w*sqrt(x2+bias)
        # y=self.fc3(y)#N,10
        self.y=self.fc3(y)
        y=torch.softmax(self.y,1)
        return y


if __name__ == '__main__':
    save_params = r"./save_params/parmas.pth"# Save parameters 
    save_net = r"./save_params/net.pth"# Save network 
    transf = transforms.Compose([transforms.ToTensor(),
             transforms.Normalize(mean=[0.5,],std=[0.5,])])
    #transforms.Compose  take transforms In the list transform Operation to traverse .
    #transforms.ToTensor()  The gray scale ranges from 0-255 Change to 0-1 Between 
    #transforms.Normalize hold 0-1 Change to (-1,1),(image-mean)/std
    train_data = datasets.MNIST("./data",train=True,transform=transf,download=True)# Read training data 
    test_data = datasets.MNIST("./data",train=False,transform=transf,download=False)# Read test data 

    # 100 Up picture ,True  Whether to disturb , Random , Give different characteristics to learn 
    trin_loader = DataLoader(train_data,100,True)# Load data 
    test_loader = DataLoader(test_data,100,True)
    # DataLoader()
    #  Use multiple processes to speed up batchdata To deal with 
    #  Intuitive network input data structure , Easy to use and expand 
    print(train_data.data.shape)
    print(train_data.targets.shape)
    print(test_data.data.shape)
    print(test_data.targets.shape)
    print(test_data.classes)

    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    net=Net().to(device)# When the network loader starts reading data tensor Variable copy One to device As specified by the cuda Up 
    if os.path.exists(save_params):
        net.load_state_dict(torch.load(save_params))# Only load parameters 
        print(" Parameters loaded successfully ")
    else:
        print("No params!")
    # net = torch.load(save_net).to(device)# Load parameters and Networks 
    # loss_fn = torch.nn.CrossEntropyLoss()# Multi category cross entropy , Define the loss function , Update gradient ----
    # loss_fn=torch.nn.MSELoss()# Mean square error 
    # loss_fn=torch.nn.BCELoss()# Two categories cross entropy 
    loss_fn=torch.nn.BCEWithLogitsLoss()# Two categories cross entropy , Do it automatically for the input value sigmoid
    # optim = torch.optim.SGD(net.parameters(),lr=1e-3)# More stable 
    optim = torch.optim.Adam(net.parameters(),lr=1e-3)# Create optimizer 
    #torch.optim.Adam  Optimizer 
    # (net.parameters(),  Of the parameters to be optimized iterable Or the one that defines the parameter group dict
    # lr=1e-3)  Learning rate or step factor 

    # test , Real time drawing and analysis 
    a = []
    b = []
    # plt.ion()
    net.train()
    for epoch in range(1):
        for i ,(x,y) in enumerate(trin_loader):
            x = x.to(device)
            y = y.to(device)
            y_ = torch.zeros(len(y), max(y) + 1).to(device)
            y_[torch.arange(len(y)), y] = 1
            out = net(x)# Forward output 
            # loss = loss_fn(out,y)# Seeking loss 
            loss = loss_fn(net.y,y_)
            optim.zero_grad()# Clear the current gradient 
            loss.backward()# Calculate the current gradient 
            optim.step()# Update one step along the current gradient 
            # a.append(i)
            # b.append(loss.item())
            # plt.clf()
            # plt.plot(a,b)
            # plt.pause(0.1)
            if i%50==0:
                print("loss",loss.item())
    plt.ioff()
    plt.show()


    #  test 
    eval_loss=0
    eval_acc=0
    net.eval()
    for i,(x,y) in enumerate(test_loader):
        x = x.to(device)#x Send it to the network 
        y = y.to(device)
        y_ = torch.zeros(len(y), max(y) + 1).to(device)
        y_[torch.arange(len(y)), y] = 1
        out = net(x)
        # loss = loss_fn(out, y)
        loss = loss_fn(out,y_)
        eval_loss += loss.item() * y.size(0)
        eval_acc += (y == torch.argmax(out, 1)).cpu().sum().item()
    avg_loss = eval_loss / len(test_data)
    avg_acc = eval_acc / len(test_data)
    print(avg_loss)
    print(avg_acc)

    if not os.path.exists("./save_params"):
        os.mkdir("./save_params")
    torch.save(net.state_dict(),"./save_params/parmas.pth")# Save only parameters 
    torch.save(net,"./save_params/net.pth")

summary

Tips ： Here is a summary of the article ：

loss_fn = torch.nn.CrossEntropyLoss()# Multi category cross entropy , The output does not need to add an activation function .
loss_fn=torch.nn.MSELoss()# Mean square error 、 Output needs to add activation function .
loss_fn=torch.nn.BCELoss()# Two categories cross entropy 、 Output needs to add activation function .
loss_fn=torch.nn.BCEWithLogitsLoss()# Two categories cross entropy , Do it automatically for the input value sigmoid

原网站

版权声明
本文为[My hair is messy]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/210/202207290519543302.html