当前位置:网站首页>【Day2】 convolutional-neural-networks
【Day2】 convolutional-neural-networks
2022-07-04 09:37:00 【weixin_45965693】
Get!New
1.super(MLP, self).__init__(**kwargs)
:这句话调用nn.Block
的__init__
函数,它提供了prefix(指定名字)和params(指定模型参数)
net3 = MLP(prefix='another_mlp_')
2.net.name_scope()
:调用nn.Block
提供的name_scope()
函数。nn.Dense的定义放在这个scope里面。它的作用是给里面的所有层和参数的名字加上前缀(prefix)使得他们在系统里面独一无二。
卷积神经网络
卷积:input/output 2channel
池化(pooling):和卷积类似,每次看一个小窗口,然后选出小窗口中的最大或平均元素作为输出。
LeNet
两层卷积+两层全连接
权重格式:input_filter×output_filter×height×width
当输入数据有多个通道的时候,每个通道会有对应的权重,然后会对每个通道做卷积之后在通道之间求和
c o n v ( d a t a , w , b ) = ∑ i c o n v ( d a t a [ : , i , : , : ] , w [ 0 , i , : , : ] , b ) conv(data,w,b)=\sum_{i}conv(data[:,i,:,:],w[0,i,:,:],b) conv(data,w,b)=i∑conv(data[:,i,:,:],w[0,i,:,:],b)
卷积模块通常是“卷积层-激活层-池化层”。然后转成2D矩阵输出给后面的全连接层。
def net(X, verbose=False):
X = X.as_in_context(W1.context)
# 第一层卷积
h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0])
h1_activation = nd.relu(h1_conv)
h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2,2), stride=(2,2))
# 第二层卷积
h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0])
h2_activation = nd.relu(h2_conv)
h2 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2,2), stride=(2,2))
h2 = nd.flatten(h2)
# 第一层全连接
h3_linear = nd.dot(h2, W3) + b3
h3 = nd.relu(h3_linear)
# 第二层全连接
h4_linear = nd.dot(h3, W4) + b4
if verbose:
print('1st conv block:', h1.shape)
print('2nd conv block:', h2.shape)
print('1st dense:', h3.shape)
print('2nd dense:', h4_linear.shape)
print('output:', h4_linear)
return h4_linear
gluon
不用管输入size
net = gluon.nn.Sequential()
with net.name_scope():
net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))
net.add(gluon.nn.MaxPool2D(pool_size=2,strides=2))
net.add(gluon.nn.Conv2D(channels=50, kernel_size=3, activation='relu'))
net.add(gluon.nn.MaxPool2D(pool_size=2,strides=2))
net.add(gluon.nn.Flatten())
net.add(gluon.nn.Dense(128,activation="relu"))
net.add(gluon.nn.Dense(10))
创建神经网络block
nn.block是什么?–提供灵活的网络定义
在gluon
里,nn.block
是一个一般化的部件。整个神经网络可以是一个nn.Block
,单个层也是一个nn.Block
。我们可以(近似)无限地【嵌套】nn.Block
来构建新的nn.Block
。主要提供:
- 存储参数
- 描述
forward
如何执行 - 自动求导
class MLP(nn.Block):
def __init__(self, **kwargs):
super(MLP, self).__init__(**kwargs)
with self.name_scope():
self.dense0 = nn.Dense(256)
self.dnese1 = nn.Dense(10)
def forward(self, x):
return self.dense1(nd.relu(self.dense0(x)))
class FancyMLP(nn.Block):
def __init__(self, **kwargs):
super(FancyMLP, self).__init__(**kwargs)
with self.name_scope():
self.dense = nn.Dense(256)
self.weight = nd.random_uniform(shape=(256,20))
def forward(self, x):
x = nd.relu(self.dense(x))
print('layer 1:',x)
x = nd.relu(nd.dot(x, self.weight)+1)
print('layer 2:',x)
x = nd.relu(self.dense(x))
return x
fancy_mlp = FancyMLP()
fancy_mlp.initialize()
y = fancy_mlp(x)
print(y.shape)
nn.Sequential是什么?–定义更加简单nn.Sequential
是一个nn.Block
容器,它通过add
来添加nn.Block
。它自动生成forward()
函数,其就是把加进来的nn.Block
逐一运行。
class Sequential(nn.Block):
def __init__(self, **kwargs):
super(Sequential, self).__init__(**kwargs)
def add(self, block):
self._children.append(block)
def forward(self, x):
for block in self._children:
x = block(x)
return x
add layer
net = nn.Sequenctial()
with net.name_scope():
net.add(nn.Dense(256, activation="relu"))
net.add(nn.Dense(10))
net.initialize()
nn下面的类基本都是nn.Block子类,他们可以很方便地嵌套使用
class RecMLP(nn.Block):
def __init__(self. **kwargs):
super(RecMLP, self).__init__(**kwargs)
self.net = nn.Sequential()
with self.name_scope():
self.net.add(nn.Dense(256, activation="relu"))
self.net.add(nn.Dense(128, activation="relu"))
self.net.add(nn.Dense(64, activation="relu"))
def forward(self, x):
return nd.relu(self.dense(self.net(x)))
rec_mlp = nn.Sequential()
rec_mlp.add(RecMLP())
rec_mlp.add(nn.Dense(10))
print(rec_mlp)
初始化模型参数
访问:params = net.collect_params()
class MyInit(init.Initializer):
def __init__(self):
super(MyInit, self).__init__()
self._verbose = True
def __init__weight(self, __, arr):
# 初始化权重,使用out=arr后我们不需要指定形状
nd.random.uniform(low=5, high=10, out=arr)
def __init__bias(self, __, arr):
# 初始化偏移
arr[:] = 2
params.initialize(init=MyInit(), force_reinit=True)
print(net[0].weight.data(), net[0].bias.data())
共享模型参数
net.add(nn.Dense(4, in_units=4, activation="relu"))
net.add(nn.Dense(4, in_units=4, activation="relu", params=net[-1].params))
定义一个简单的层
下面代码定义一个层将输入减掉均值。
from mxnet import nd
from mxnet.gluon import nn
class CenteredLayer(nn.Block):
def __init__(self, **kwargs):
super(CenteredLayer, self).__init__(**kwargs)
def forward(self, x):
return x - x.mean()
layer = CenteredLayer()#没有模型参数,不用initialize
layer(nd.array([1,2,3,4,5]))
Alexnet:深度卷积神经网络
net = nn.Sequential()
# 使用较大的11 x 11窗口来捕获物体。同时使用步幅4来较大幅度减小输出高和宽。这里使用的输出通
# 道数比LeNet中的也要大很多
net.add(nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
# 前两个卷积层后不使用池化层来减小输入的高和宽
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
# 输出层。由于这里使用Fashion-MNIST,所以用类别数为10,而非论文中的1000
nn.Dense(10))
trick:丢弃法 dropout —— 应对过拟合
通常是对输入层或者隐含层做以下操作:
- 随机选择一部分该层的输出作为丢弃元素
- 把丢弃元素乘以0
- 把非丢弃元素拉伸
每一次都激活一部分的模型跑
def dropout(X, drop_prob):
assert 0 <= drop_prob <= 1
keep_prob = 1 - drop_prob
# 这种情况下把全部元素都丢弃
if keep_prob == 0:
return X.zeros_like()
# 随机选择一部分该层的输出作为丢弃元素
mask = nd.random.uniform(0, 1, X.shape) < keep_prob
return mask * X / keep_prob
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2, W3, b3]
for param in params:
param.attach_grad()
drop_prob1, drop_prob2 = 0.2, 0.5
def net(X):
X = X.reshape((-1, num_inputs))
H1 = (nd.dot(X, W1) + b1).relu()
if autograd.is_training(): # 只在训练模型时使用丢弃法
H1 = dropout(H1, drop_prob1) # 在第一层全连接后添加丢弃层
H2 = (nd.dot(H1, W2) + b2).relu()
if autograd.is_training():
H2 = dropout(H2, drop_prob2) # 在第二层全连接后添加丢弃层
return nd.dot(H2, W3) + b3
VGG:使用重复元素的非常深的网络
def vgg_block(num_convs, num_channels):
blk = nn.Sequential()
for _ in range(num_convs):
blk.add(nn.Conv2D(num_channels, kernel_size=3,
padding=1, activation='relu'))
blk.add(nn.MaxPool2D(pool_size=2, strides=2))
return blk
def vgg(conv_arch):
net = nn.Sequential()
# 卷积层部分
for (num_convs, num_channels) in conv_arch:
net.add(vgg_block(num_convs, num_channels))
# 全连接层部分
net.add(nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
nn.Dense(10))
return net
net = vgg(conv_arch)
批量归一化 batch-norm
好处:收敛更快
每层归一化
每个channel归一化
均值0,方差1
测试时用整个数据的均值和方差
但是党训练数据极大时,这个计算开销很大。因此,我们用移动平均的方法来近似计算(mvoing_mean和moving_variance)
def batch_norm(X, gamma, beta, is_training, moving_mean, moving_variance, eps = 1e-5, moving_momentum = 0.9):
assert len(X.shape) in (2,4)
# 全连接:batch_size x feature
if len(X.shanpe) == 2:
# 每个输入维度在样本上的平均和方差
mean = X.mean(axis=0)
variance = ((X - mean)**2.mean(axis=0))
# 2D卷积:batch_size × channel × height × width
else:
# 对每个通道算均值和方差,需要保持4D形状使得可以正确的广播
mean = X.mean(axis=(0,2,3), keepdims=True)
variance = ((X - mean)**2).mean(axis=(0,2,3), keepdims=True)
# 变形使得可以正确广播
moving_mean = moving_mean.reshape(mean.shape)
moving_variance = moving_variance.reshape(mean.shape)
# 均一化
if is_training:
X_hat = (X - mean) / nd.sqrt(variance + eps)
#!!! 更新全局的均值和方差
moving_mean[:] = moving_momentum * moving_mean + (1.0 - moving_momentum) * mean
moving_variance[:] = moving_momentum * moving_variance + (1.0 - moving_momentum) * variance
else:
#!!! 测试阶段使用全局的均值和方差
X_hat = (X - moving_mean) / nd.sqrt(moving_variance + eps)
# 拉伸和偏移
return gamma.reshape(mean.shape) * X_hat + beta.reshape(mean.shape)
在gluon中使用
边栏推荐
- How to display √ 2 on the command line terminal ̅? This is actually a blog's Unicode test article
- Devop basic command
- 百度研发三面惨遭滑铁卢:面试官一套组合拳让我当场懵逼
- Hands on deep learning (46) -- attention mechanism
- 对于程序员来说,伤害力度最大的话。。。
- 六月份阶段性大总结之Doris/Clickhouse/Hudi一网打尽
- Lauchpad X | 模式
- How can people not love the amazing design of XXL job
- C语言指针经典面试题——第一弹
- Application of safety monitoring in zhizhilu Denggan reservoir area
猜你喜欢
Devop basic command
自动化的优点有哪些?
Svg image quoted from CodeChina
Hands on deep learning (33) -- style transfer
直方图均衡化
Application of safety monitoring in zhizhilu Denggan reservoir area
Summary of small program performance optimization practice
uniapp 处理过去时间对比现在时间的时间差 如刚刚、几分钟前,几小时前,几个月前
智能网关助力提高工业数据采集和利用
Dynamic address book
随机推荐
2021-08-10 character pointer
Hands on deep learning (44) -- seq2seq principle and Implementation
Golang Modules
PHP personal album management system source code, realizes album classification and album grouping, as well as album image management. The database adopts Mysql to realize the login and registration f
Some summaries of the third anniversary of joining Ping An in China
法向量点云旋转
Four common methods of copying object attributes (summarize the highest efficiency)
7-17 crawling worms (15 points)
Fabric of kubernetes CNI plug-in
C # use smtpclient The sendasync method fails to send mail, and always returns canceled
Mmclassification annotation file generation
Summary of reasons for web side automation test failure
Hands on deep learning (38) -- realize RNN from scratch
【leetcode】540. A single element in an ordered array
Write a jison parser from scratch (3/10): a good beginning is half the success -- "politics" (Aristotle)
`Example of mask ` tool use
Exercise 8-7 string sorting (20 points)
Lauchpad x | MODE
Golang defer
How to teach yourself to learn programming