当前位置:网站首页>Pytoch learning record (III): random gradient descent, neural network and full connection
Pytoch learning record (III): random gradient descent, neural network and full connection
2022-07-28 21:36:00 【Arina, beaver】
List of articles
1. Stochastic gradient descent
1.1 Activation function and its gradient
1.1.1 Sigmoid / Logistic


import torch
a = torch.linspace(-100,100,100,requires_grad=True)
b = torch.sigmoid(a)
print(b)
''' tensor([0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.8349e-39, 5.1533e-38, 3.8855e-37, 2.9296e-36, 2.2089e-35, 1.6655e-34, 1.2557e-33, 9.4681e-33, 7.1388e-32, 5.3825e-31, 4.0584e-30, 3.0599e-29, 2.3072e-28, 1.7396e-27, 1.3116e-26, 9.8893e-26, 7.4564e-25, 5.6220e-24, 4.2389e-23, 3.1961e-22, 2.4098e-21, 1.8169e-20, 1.3699e-19, 1.0329e-18, 7.7881e-18, 5.8721e-17, 4.4274e-16, 3.3382e-15, 2.5170e-14, 1.8978e-13, 1.4309e-12, 1.0789e-11, 8.1345e-11, 6.1333e-10, 4.6244e-09, 3.4867e-08, 2.6289e-07, 1.9822e-06, 1.4945e-05, 1.1267e-04, 8.4891e-04, 6.3653e-03, 4.6075e-02, 2.6696e-01, 7.3304e-01, 9.5392e-01, 9.9363e-01, 9.9915e-01, 9.9989e-01, 9.9999e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00], grad_fn=<SigmoidBackward>) '''
1.1.2 Tanh


import torch
a = torch.linspace(-1,1,10,requires_grad=True)
b = torch.tanh(a)
print(b)
''' tensor([-0.7616, -0.6514, -0.5047, -0.3215, -0.1107, 0.1107, 0.3215, 0.5047, 0.6514, 0.7616], grad_fn=<TanhBackward>) '''
1.1.3 ReLU


import torch
import torch.nn.functional as F
a = torch.linspace(-1,1,10,requires_grad=True)
b = torch.relu(a)
c = F.relu(a)
print(b)
print(c)
''' tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778, 1.0000], grad_fn=<ReluBackward0>) tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778, 1.0000], grad_fn=<ReluBackward0>) '''
1.2 Loss Function and its gradient
1.2.1 MSE


1.2.2 autograd.grad() Find gradient
import torch
import torch.nn.functional as F
x = torch.ones(1)
w = torch.full([1],2., requires_grad=True) # The length is 1, The value is 2
mse = F.mse_loss(torch.ones(1), x*w) # The first parameter is zero predict value The second is label value
print(mse)
print(torch.autograd.grad(mse, [w]))
''' tensor(1., grad_fn=<MseLossBackward>) (tensor([2.]),) '''
1.2.3 loss.backward() Find gradient
import torch
import torch.nn.functional as F
x = torch.ones(1)
w = torch.full([1],2., requires_grad=True) # The length is 1, The value is 2
mse = F.mse_loss(torch.ones(1), x*w) # The first parameter is zero predict value The second is label value
print(mse)
mse.backward()
print(w.grad)
''' tensor(1., grad_fn=<MseLossBackward>) tensor([2.]) '''
1.2.4 Softmax
import torch
import torch.nn.functional as F
a = torch.rand(3)
a.requires_grad_()
print(a)
p = F.softmax(a, dim = 0)
print(p)
print(torch.autograd.grad(p[0], [a], retain_graph=True)) # The dynamic graph will not be cleared , You can find the gradient several times in a row , Or many times backward
print(torch.autograd.grad(p[1], [a], retain_graph=True))
print(torch.autograd.grad(p[2], [a], retain_graph=True))
''' tensor([0.8659, 0.0540, 0.4153], requires_grad=True) tensor([0.4805, 0.2133, 0.3062], grad_fn=<SoftmaxBackward>) (tensor([ 0.2496, -0.1025, -0.1471]),) (tensor([-0.1025, 0.1678, -0.0653]),) (tensor([-0.1471, -0.0653, 0.2124]),) '''
2. Neural network and full connection
2.1 Entropy


2.2 Multiple classification problem
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
w1, b1 = torch.randn(200, 784, requires_grad=True),\
torch.zeros(200, requires_grad=True)
w2, b2 = torch.randn(200, 200, requires_grad=True),\
torch.zeros(200, requires_grad=True)
w3, b3 = torch.randn(10, 200, requires_grad=True),\
torch.zeros(10, requires_grad=True)
torch.nn.init.kaiming_normal_(w1)
torch.nn.init.kaiming_normal_(w2)
torch.nn.init.kaiming_normal_(w3)
def forward(x):
x = [email protected].t() + b1
x = F.relu(x)
x = [email protected].t() + b2
x = F.relu(x)
x = [email protected].t() + b3
x = F.relu(x) # It can also be deactivated here
return x
optimizer = torch.optim.SGD([w1,b1,w2,b2,w3,b3], lr = 1e-2)
criteon = nn.CrossEntropyLoss()
epoches = 10
batch_size = 200
minist_train = datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]
))
minist_val = datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]
))
train_loader = torch.utils.data.DataLoader(minist_train, batch_size = batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(minist_val, batch_size = batch_size, shuffle = False)
for epoch in range(epoches):
for batch_idx, (data, target) in enumerate(train_loader):
data = data.view(-1, 28*28)
logits = forward(data)
loss =criteon(logits, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, \
batch_idx*len(data), len(train_loader.dataset), 100. *batch_idx/len(train_loader),
loss.item()))
test_loss = 0
total_correct = 0
for data, target in val_loader:
data = data.view(-1, 28*28)
logits = forward(data) #(N, classes)
loss = criteon(logits, target)
test_loss += loss.item()
pred = logits.data.max(dim=1)[1]
correct = pred.eq(target.data).sum()
total_correct += correct
test_loss /= len(val_loader.dataset)
accuracy = total_correct / len(val_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, total_correct, len(val_loader.dataset),
100. * accuracy))
''' ... Train epoch: 7 [0/60000 (0%)] Loss: 0.188797 Train epoch: 7 [20000/60000 (33%)] Loss: 0.157730 Train epoch: 7 [40000/60000 (67%)] Loss: 0.153730 Test set: Average loss: 0.0008, Accuracy: 9513/10000 (95%) Train epoch: 8 [0/60000 (0%)] Loss: 0.242635 Train epoch: 8 [20000/60000 (33%)] Loss: 0.092858 Train epoch: 8 [40000/60000 (67%)] Loss: 0.165861 Test set: Average loss: 0.0008, Accuracy: 9540/10000 (95%) Train epoch: 9 [0/60000 (0%)] Loss: 0.099372 Train epoch: 9 [20000/60000 (33%)] Loss: 0.118166 Train epoch: 9 [40000/60000 (67%)] Loss: 0.155070 Test set: Average loss: 0.0007, Accuracy: 9556/10000 (96%) '''
2.3 Fully connected layer
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.model = nn.Sequential(
nn.Linear(784,200),
nn.ReLU(inplace=True),
nn.Linear(200,200),
nn.ReLU(inplace=True),
nn.Linear(200,10),
nn.ReLU(inplace=True),
)
def forward(self, x):
return self.model(x)
mlp = MLP()
optimizer = torch.optim.SGD(mlp.parameters(), lr = 1e-2)
2.4 Activation function and GPU Speed up
2.4.1 Leaky ReLU

2.4.2 SELU

2.4.3 softplus

2.4.4 GPU Speed up

边栏推荐
- Go并发编程基础
- Explain C language 12 in detail (C language series)
- Layout the 6G track in advance! Ziguang zhanrui released the white paper "6G unbounded AI"
- Vimtutor编辑
- Invalid prompt object name in SQL Server
- Another installation artifact
- 大学荒废三年,大四自学7个月测试,找到了12K的工作
- LeetCode链表问题——142.环形链表II(一题一文学会链表)
- Api 接口优化的几个技巧
- How Oracle exports data (how Oracle backs up databases)
猜你喜欢

Timing analysis and constraints based on Xilinx

There have been two safety accidents in a month after listing. Is L9 ideal?

ICML2022 | 时序自监督视频transformer

日志瘦身神操作:从5G优化到1G到底是怎么做到的!(荣耀典藏版)

Kubedm builds kubernetes cluster

Uncaught Error:Invalid geoJson format Cannot read property ‘length‘ of undefind

Buuctf questions upload labs record pass-11~pass-20

大学荒废三年,大四自学7个月测试,找到了12K的工作

编码用这16个命名规则能让你少写一半以上的注释!

CVPR 2022 | 网络中批处理归一化估计偏移的深入研究
随机推荐
MSI Bao'an factory is on fire! Official response: no one was injured, and the production line will not be affected!
比UUID更快更安全NanoID到底是怎么实现的?(荣耀典藏版)
如何优雅的设计工作流引擎(荣耀典藏版)
Kubedm builds kubernetes cluster
实现瀑布流效果
Study - Summary of geometric calculations
微服务架构下的系统集成
It is said that Microsoft has obtained the supply license for Xianghua! Will Huawei usher in the full lifting of the ban?
证券企业基于容器化 PaaS 平台的 DevOps 规划建设 29 个典型问题总结
Achieve waterfall effect
云安全核心技术
Leetcode interview question 02.07. Linked list intersection [knowledge points: Double pointers, stack]
Vimtutor编辑
MySQL
C#流程控制语句
Query Oracle view creation statement and how to insert data into the view [easy to understand]
Why on earth is it not recommended to use select *?
Hold high the two flags of 5g and AI: Ziguang zhanrui Market Summit is popular in Shencheng
JVM 内存布局详解(荣耀典藏版)
ST法国三座工厂大罢工,芯片缺货情况或将更加严重!