当前位置:网站首页>【Day2】 convolutional-neural-networks
【Day2】 convolutional-neural-networks
2022-07-04 09:37:00 【weixin_45965693】
Get!New
1.super(MLP, self).__init__(**kwargs)
:这句话调用nn.Block
的__init__
函数,它提供了prefix(指定名字)和params(指定模型参数)
net3 = MLP(prefix='another_mlp_')
2.net.name_scope()
:调用nn.Block
提供的name_scope()
函数。nn.Dense的定义放在这个scope里面。它的作用是给里面的所有层和参数的名字加上前缀(prefix)使得他们在系统里面独一无二。
卷积神经网络
卷积:input/output 2channel
池化(pooling):和卷积类似,每次看一个小窗口,然后选出小窗口中的最大或平均元素作为输出。
LeNet
两层卷积+两层全连接
权重格式:input_filter×output_filter×height×width
当输入数据有多个通道的时候,每个通道会有对应的权重,然后会对每个通道做卷积之后在通道之间求和
c o n v ( d a t a , w , b ) = ∑ i c o n v ( d a t a [ : , i , : , : ] , w [ 0 , i , : , : ] , b ) conv(data,w,b)=\sum_{i}conv(data[:,i,:,:],w[0,i,:,:],b) conv(data,w,b)=i∑conv(data[:,i,:,:],w[0,i,:,:],b)
卷积模块通常是“卷积层-激活层-池化层”。然后转成2D矩阵输出给后面的全连接层。
def net(X, verbose=False):
X = X.as_in_context(W1.context)
# 第一层卷积
h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0])
h1_activation = nd.relu(h1_conv)
h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2,2), stride=(2,2))
# 第二层卷积
h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0])
h2_activation = nd.relu(h2_conv)
h2 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2,2), stride=(2,2))
h2 = nd.flatten(h2)
# 第一层全连接
h3_linear = nd.dot(h2, W3) + b3
h3 = nd.relu(h3_linear)
# 第二层全连接
h4_linear = nd.dot(h3, W4) + b4
if verbose:
print('1st conv block:', h1.shape)
print('2nd conv block:', h2.shape)
print('1st dense:', h3.shape)
print('2nd dense:', h4_linear.shape)
print('output:', h4_linear)
return h4_linear
gluon
不用管输入size
net = gluon.nn.Sequential()
with net.name_scope():
net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))
net.add(gluon.nn.MaxPool2D(pool_size=2,strides=2))
net.add(gluon.nn.Conv2D(channels=50, kernel_size=3, activation='relu'))
net.add(gluon.nn.MaxPool2D(pool_size=2,strides=2))
net.add(gluon.nn.Flatten())
net.add(gluon.nn.Dense(128,activation="relu"))
net.add(gluon.nn.Dense(10))
创建神经网络block
nn.block是什么?–提供灵活的网络定义
在gluon
里,nn.block
是一个一般化的部件。整个神经网络可以是一个nn.Block
,单个层也是一个nn.Block
。我们可以(近似)无限地【嵌套】nn.Block
来构建新的nn.Block
。主要提供:
- 存储参数
- 描述
forward
如何执行 - 自动求导
class MLP(nn.Block):
def __init__(self, **kwargs):
super(MLP, self).__init__(**kwargs)
with self.name_scope():
self.dense0 = nn.Dense(256)
self.dnese1 = nn.Dense(10)
def forward(self, x):
return self.dense1(nd.relu(self.dense0(x)))
class FancyMLP(nn.Block):
def __init__(self, **kwargs):
super(FancyMLP, self).__init__(**kwargs)
with self.name_scope():
self.dense = nn.Dense(256)
self.weight = nd.random_uniform(shape=(256,20))
def forward(self, x):
x = nd.relu(self.dense(x))
print('layer 1:',x)
x = nd.relu(nd.dot(x, self.weight)+1)
print('layer 2:',x)
x = nd.relu(self.dense(x))
return x
fancy_mlp = FancyMLP()
fancy_mlp.initialize()
y = fancy_mlp(x)
print(y.shape)
nn.Sequential是什么?–定义更加简单nn.Sequential
是一个nn.Block
容器,它通过add
来添加nn.Block
。它自动生成forward()
函数,其就是把加进来的nn.Block
逐一运行。
class Sequential(nn.Block):
def __init__(self, **kwargs):
super(Sequential, self).__init__(**kwargs)
def add(self, block):
self._children.append(block)
def forward(self, x):
for block in self._children:
x = block(x)
return x
add layer
net = nn.Sequenctial()
with net.name_scope():
net.add(nn.Dense(256, activation="relu"))
net.add(nn.Dense(10))
net.initialize()
nn下面的类基本都是nn.Block子类,他们可以很方便地嵌套使用
class RecMLP(nn.Block):
def __init__(self. **kwargs):
super(RecMLP, self).__init__(**kwargs)
self.net = nn.Sequential()
with self.name_scope():
self.net.add(nn.Dense(256, activation="relu"))
self.net.add(nn.Dense(128, activation="relu"))
self.net.add(nn.Dense(64, activation="relu"))
def forward(self, x):
return nd.relu(self.dense(self.net(x)))
rec_mlp = nn.Sequential()
rec_mlp.add(RecMLP())
rec_mlp.add(nn.Dense(10))
print(rec_mlp)
初始化模型参数
访问:params = net.collect_params()
class MyInit(init.Initializer):
def __init__(self):
super(MyInit, self).__init__()
self._verbose = True
def __init__weight(self, __, arr):
# 初始化权重,使用out=arr后我们不需要指定形状
nd.random.uniform(low=5, high=10, out=arr)
def __init__bias(self, __, arr):
# 初始化偏移
arr[:] = 2
params.initialize(init=MyInit(), force_reinit=True)
print(net[0].weight.data(), net[0].bias.data())
共享模型参数
net.add(nn.Dense(4, in_units=4, activation="relu"))
net.add(nn.Dense(4, in_units=4, activation="relu", params=net[-1].params))
定义一个简单的层
下面代码定义一个层将输入减掉均值。
from mxnet import nd
from mxnet.gluon import nn
class CenteredLayer(nn.Block):
def __init__(self, **kwargs):
super(CenteredLayer, self).__init__(**kwargs)
def forward(self, x):
return x - x.mean()
layer = CenteredLayer()#没有模型参数,不用initialize
layer(nd.array([1,2,3,4,5]))
Alexnet:深度卷积神经网络
net = nn.Sequential()
# 使用较大的11 x 11窗口来捕获物体。同时使用步幅4来较大幅度减小输出高和宽。这里使用的输出通
# 道数比LeNet中的也要大很多
net.add(nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
# 前两个卷积层后不使用池化层来减小输入的高和宽
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
# 输出层。由于这里使用Fashion-MNIST,所以用类别数为10,而非论文中的1000
nn.Dense(10))
trick:丢弃法 dropout —— 应对过拟合
通常是对输入层或者隐含层做以下操作:
- 随机选择一部分该层的输出作为丢弃元素
- 把丢弃元素乘以0
- 把非丢弃元素拉伸
每一次都激活一部分的模型跑
def dropout(X, drop_prob):
assert 0 <= drop_prob <= 1
keep_prob = 1 - drop_prob
# 这种情况下把全部元素都丢弃
if keep_prob == 0:
return X.zeros_like()
# 随机选择一部分该层的输出作为丢弃元素
mask = nd.random.uniform(0, 1, X.shape) < keep_prob
return mask * X / keep_prob
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2, W3, b3]
for param in params:
param.attach_grad()
drop_prob1, drop_prob2 = 0.2, 0.5
def net(X):
X = X.reshape((-1, num_inputs))
H1 = (nd.dot(X, W1) + b1).relu()
if autograd.is_training(): # 只在训练模型时使用丢弃法
H1 = dropout(H1, drop_prob1) # 在第一层全连接后添加丢弃层
H2 = (nd.dot(H1, W2) + b2).relu()
if autograd.is_training():
H2 = dropout(H2, drop_prob2) # 在第二层全连接后添加丢弃层
return nd.dot(H2, W3) + b3
VGG:使用重复元素的非常深的网络
def vgg_block(num_convs, num_channels):
blk = nn.Sequential()
for _ in range(num_convs):
blk.add(nn.Conv2D(num_channels, kernel_size=3,
padding=1, activation='relu'))
blk.add(nn.MaxPool2D(pool_size=2, strides=2))
return blk
def vgg(conv_arch):
net = nn.Sequential()
# 卷积层部分
for (num_convs, num_channels) in conv_arch:
net.add(vgg_block(num_convs, num_channels))
# 全连接层部分
net.add(nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
nn.Dense(10))
return net
net = vgg(conv_arch)
批量归一化 batch-norm
好处:收敛更快
每层归一化
每个channel归一化
均值0,方差1
测试时用整个数据的均值和方差
但是党训练数据极大时,这个计算开销很大。因此,我们用移动平均的方法来近似计算(mvoing_mean和moving_variance)
def batch_norm(X, gamma, beta, is_training, moving_mean, moving_variance, eps = 1e-5, moving_momentum = 0.9):
assert len(X.shape) in (2,4)
# 全连接:batch_size x feature
if len(X.shanpe) == 2:
# 每个输入维度在样本上的平均和方差
mean = X.mean(axis=0)
variance = ((X - mean)**2.mean(axis=0))
# 2D卷积:batch_size × channel × height × width
else:
# 对每个通道算均值和方差,需要保持4D形状使得可以正确的广播
mean = X.mean(axis=(0,2,3), keepdims=True)
variance = ((X - mean)**2).mean(axis=(0,2,3), keepdims=True)
# 变形使得可以正确广播
moving_mean = moving_mean.reshape(mean.shape)
moving_variance = moving_variance.reshape(mean.shape)
# 均一化
if is_training:
X_hat = (X - mean) / nd.sqrt(variance + eps)
#!!! 更新全局的均值和方差
moving_mean[:] = moving_momentum * moving_mean + (1.0 - moving_momentum) * mean
moving_variance[:] = moving_momentum * moving_variance + (1.0 - moving_momentum) * variance
else:
#!!! 测试阶段使用全局的均值和方差
X_hat = (X - moving_mean) / nd.sqrt(moving_variance + eps)
# 拉伸和偏移
return gamma.reshape(mean.shape) * X_hat + beta.reshape(mean.shape)
在gluon中使用
边栏推荐
- 2021-08-11 function pointer
- Four common methods of copying object attributes (summarize the highest efficiency)
- Problems encountered by scan, scanf and scanln in golang
- Write a jison parser from scratch (6/10): parse, not define syntax
- Advanced technology management - how to design and follow up the performance of students at different levels
- PHP is used to add, modify and delete movie information, which is divided into foreground management and background management. Foreground users can browse information and post messages, and backgroun
- C language pointer classic interview question - the first bullet
- Write a jison parser from scratch (4/10): detailed explanation of the syntax format of the jison parser generator
- Exercise 9-4 finding books (20 points)
- Launpad | Basics
猜你喜欢
回复评论的sql
Summary of reasons for web side automation test failure
MATLAB小技巧(25)竞争神经网络与SOM神经网络
Daughter love in lunch box
Qtreeview+ custom model implementation example
Servlet基本原理与常见API方法的应用
5g/4g wireless networking scheme for brand chain stores
百度研发三面惨遭滑铁卢:面试官一套组合拳让我当场懵逼
Custom type: structure, enumeration, union
A little feeling
随机推荐
Are there any principal guaranteed financial products in 2022?
Hands on deep learning (38) -- realize RNN from scratch
Hands on deep learning (45) -- bundle search
Golang Modules
How web pages interact with applets
华为联机对战如何提升玩家匹配成功几率
【OpenCV 例程200篇】218. 多行倾斜文字水印
A little feeling
Hands on deep learning (32) -- fully connected convolutional neural network FCN
什么是 DevSecOps?2022 年的定义、流程、框架和最佳实践
MySQL case
JDBC and MySQL database
Launpad | Basics
C语言指针经典面试题——第一弹
Launpad | 基礎知識
`Example of mask ` tool use
Modules golang
[on February 11, 2022, the latest and most fully available script library collection of the whole network, a total of 23]
法向量点云旋转
Latex download installation record