当前位置:网站首页>Pytorch学习记录(三):随机梯度下降、神经网络与全连接
Pytorch学习记录(三):随机梯度下降、神经网络与全连接
2022-07-28 19:42:00 【狸狸Arina】
文章目录
1. 随机梯度下降
1.1 激活函数及其梯度
1.1.1 Sigmoid / Logistic


import torch
a = torch.linspace(-100,100,100,requires_grad=True)
b = torch.sigmoid(a)
print(b)
''' tensor([0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.8349e-39, 5.1533e-38, 3.8855e-37, 2.9296e-36, 2.2089e-35, 1.6655e-34, 1.2557e-33, 9.4681e-33, 7.1388e-32, 5.3825e-31, 4.0584e-30, 3.0599e-29, 2.3072e-28, 1.7396e-27, 1.3116e-26, 9.8893e-26, 7.4564e-25, 5.6220e-24, 4.2389e-23, 3.1961e-22, 2.4098e-21, 1.8169e-20, 1.3699e-19, 1.0329e-18, 7.7881e-18, 5.8721e-17, 4.4274e-16, 3.3382e-15, 2.5170e-14, 1.8978e-13, 1.4309e-12, 1.0789e-11, 8.1345e-11, 6.1333e-10, 4.6244e-09, 3.4867e-08, 2.6289e-07, 1.9822e-06, 1.4945e-05, 1.1267e-04, 8.4891e-04, 6.3653e-03, 4.6075e-02, 2.6696e-01, 7.3304e-01, 9.5392e-01, 9.9363e-01, 9.9915e-01, 9.9989e-01, 9.9999e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00], grad_fn=<SigmoidBackward>) '''
1.1.2 Tanh


import torch
a = torch.linspace(-1,1,10,requires_grad=True)
b = torch.tanh(a)
print(b)
''' tensor([-0.7616, -0.6514, -0.5047, -0.3215, -0.1107, 0.1107, 0.3215, 0.5047, 0.6514, 0.7616], grad_fn=<TanhBackward>) '''
1.1.3 ReLU


import torch
import torch.nn.functional as F
a = torch.linspace(-1,1,10,requires_grad=True)
b = torch.relu(a)
c = F.relu(a)
print(b)
print(c)
''' tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778, 1.0000], grad_fn=<ReluBackward0>) tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778, 1.0000], grad_fn=<ReluBackward0>) '''
1.2 Loss函数及其梯度
1.2.1 MSE


1.2.2 autograd.grad()求梯度
import torch
import torch.nn.functional as F
x = torch.ones(1)
w = torch.full([1],2., requires_grad=True) #长度为1,值为2
mse = F.mse_loss(torch.ones(1), x*w) #第一个参数为predict值 第二个为label值
print(mse)
print(torch.autograd.grad(mse, [w]))
''' tensor(1., grad_fn=<MseLossBackward>) (tensor([2.]),) '''
1.2.3 loss.backward()求梯度
import torch
import torch.nn.functional as F
x = torch.ones(1)
w = torch.full([1],2., requires_grad=True) #长度为1,值为2
mse = F.mse_loss(torch.ones(1), x*w) #第一个参数为predict值 第二个为label值
print(mse)
mse.backward()
print(w.grad)
''' tensor(1., grad_fn=<MseLossBackward>) tensor([2.]) '''
1.2.4 Softmax
import torch
import torch.nn.functional as F
a = torch.rand(3)
a.requires_grad_()
print(a)
p = F.softmax(a, dim = 0)
print(p)
print(torch.autograd.grad(p[0], [a], retain_graph=True)) #动态图不会被清除,可以连续多次求梯度,或者多次backward
print(torch.autograd.grad(p[1], [a], retain_graph=True))
print(torch.autograd.grad(p[2], [a], retain_graph=True))
''' tensor([0.8659, 0.0540, 0.4153], requires_grad=True) tensor([0.4805, 0.2133, 0.3062], grad_fn=<SoftmaxBackward>) (tensor([ 0.2496, -0.1025, -0.1471]),) (tensor([-0.1025, 0.1678, -0.0653]),) (tensor([-0.1471, -0.0653, 0.2124]),) '''
2. 神经网络与全连接
2.1 Entropy


2.2 多分类问题
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
w1, b1 = torch.randn(200, 784, requires_grad=True),\
torch.zeros(200, requires_grad=True)
w2, b2 = torch.randn(200, 200, requires_grad=True),\
torch.zeros(200, requires_grad=True)
w3, b3 = torch.randn(10, 200, requires_grad=True),\
torch.zeros(10, requires_grad=True)
torch.nn.init.kaiming_normal_(w1)
torch.nn.init.kaiming_normal_(w2)
torch.nn.init.kaiming_normal_(w3)
def forward(x):
x = [email protected].t() + b1
x = F.relu(x)
x = [email protected].t() + b2
x = F.relu(x)
x = [email protected].t() + b3
x = F.relu(x) #这里不激活也可以
return x
optimizer = torch.optim.SGD([w1,b1,w2,b2,w3,b3], lr = 1e-2)
criteon = nn.CrossEntropyLoss()
epoches = 10
batch_size = 200
minist_train = datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]
))
minist_val = datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]
))
train_loader = torch.utils.data.DataLoader(minist_train, batch_size = batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(minist_val, batch_size = batch_size, shuffle = False)
for epoch in range(epoches):
for batch_idx, (data, target) in enumerate(train_loader):
data = data.view(-1, 28*28)
logits = forward(data)
loss =criteon(logits, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, \
batch_idx*len(data), len(train_loader.dataset), 100. *batch_idx/len(train_loader),
loss.item()))
test_loss = 0
total_correct = 0
for data, target in val_loader:
data = data.view(-1, 28*28)
logits = forward(data) #(N, classes)
loss = criteon(logits, target)
test_loss += loss.item()
pred = logits.data.max(dim=1)[1]
correct = pred.eq(target.data).sum()
total_correct += correct
test_loss /= len(val_loader.dataset)
accuracy = total_correct / len(val_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, total_correct, len(val_loader.dataset),
100. * accuracy))
''' ... Train epoch: 7 [0/60000 (0%)] Loss: 0.188797 Train epoch: 7 [20000/60000 (33%)] Loss: 0.157730 Train epoch: 7 [40000/60000 (67%)] Loss: 0.153730 Test set: Average loss: 0.0008, Accuracy: 9513/10000 (95%) Train epoch: 8 [0/60000 (0%)] Loss: 0.242635 Train epoch: 8 [20000/60000 (33%)] Loss: 0.092858 Train epoch: 8 [40000/60000 (67%)] Loss: 0.165861 Test set: Average loss: 0.0008, Accuracy: 9540/10000 (95%) Train epoch: 9 [0/60000 (0%)] Loss: 0.099372 Train epoch: 9 [20000/60000 (33%)] Loss: 0.118166 Train epoch: 9 [40000/60000 (67%)] Loss: 0.155070 Test set: Average loss: 0.0007, Accuracy: 9556/10000 (96%) '''
2.3 全连接层
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.model = nn.Sequential(
nn.Linear(784,200),
nn.ReLU(inplace=True),
nn.Linear(200,200),
nn.ReLU(inplace=True),
nn.Linear(200,10),
nn.ReLU(inplace=True),
)
def forward(self, x):
return self.model(x)
mlp = MLP()
optimizer = torch.optim.SGD(mlp.parameters(), lr = 1e-2)
2.4 激活函数与GPU加速
2.4.1 Leaky ReLU

2.4.2 SELU

2.4.3 softplus

2.4.4 GPU加速

边栏推荐
- C#流程控制语句
- Go concurrent programming basics
- Reading and writing basic data types in protobuf
- Sharkteam completes the safety audit of flow ecological NFT market matrixmarket
- 又一款装机神器
- How NPM switches Taobao source images
- How to select and build the container cloud platform that hosts the key applications of the bank?
- The ref value ‘xxx‘ will likely have changed by the time this effect function runs. If this ref......
- Jiuxin intelligence officially joined opengauss community
- 【Bluetooth蓝牙开发】八、BLE协议之传输层
猜你喜欢
随机推荐
protobuf 中基础数据类型的读写
Efficientformer: lightweight vit backbone
云安全核心技术
Jiuxin intelligence officially joined opengauss community
After Europe, it entered Japan and South Korea again, and the globalization of Pico consumer VR accelerated
The ref value ‘xxx‘ will likely have changed by the time this effect function runs.If this ref......
Study and use of cobalt strike
【题目】两数相加
What is ci/cd| Achieve faster and better software delivery
Link with bracket sequence I (state based multidimensional DP)
上市1个月接连发生两起安全事故,理想L9还理想吗?
What is low code? Which platforms are suitable for business personnel? Is it reliable to develop the system?
又一款装机神器
Api 接口优化的几个技巧
MySQL sorts out the review content -- with mind map
IJCAI2022教程 | 对话推荐系统
Mobilevit: challenge the end-to-side overlord of mobilenet
Moco V2: further upgrade of Moco series
Redis缓存雪崩、缓存穿透、缓存击穿
(PMIC) full and half bridge drive csd95481rwj PDF specification








