当前位置:网站首页>Implementing dropout with mxnet from zero sum
Implementing dropout with mxnet from zero sum
2022-07-26 17:10:00 【Full stack programmer webmaster】
Hello everyone , I meet you again , I'm the king of the whole stack
demand :
- Use from zero sum mxnet Realization dropout
Data sets :
- Use load_digits() Handwritten digital datasets
requirement :
- Use 1 Hidden layers n_hidden1 = 36, The activation function is relu, The loss function is softmax Cross entropy loss function
Be careful :
- drop The realization of function
- During training and testing drop The difference between
1. From zero dropout
from sklearn import datasets
from mxnet import gluon,nd,autograd,init
from mxnet.gluon import nn,data as gdata,loss as gloss,trainer# Load data set
digits = datasets.load_digits()
features,labels = nd.array(digits['data']),nd.array(digits['target'])
print(features.shape,labels.shape)
labels_onehot = nd.one_hot(labels,10)
print(labels_onehot.shape)(1797, 64) (1797,)
(1797, 10)class NeuroNet:
def __init__(self,n_inputs,n_hidden1,n_outputs):
hidden_layer = Layer(n_inputs,n_hidden1)
output_layer = Layer(n_hidden1,n_outputs)
self.layers = [hidden_layer,output_layer]
for layer in self.layers:
for param in layer.params:
param.attach_grad()
def softmax(self,x):
step1 = x.exp()
step2 = step1 / step1.sum(axis=1,keepdims=True)
return step2
def softmaxCrossEntropyLoss(self,y_pred,y):
step1 = -y * y_pred.log()
step2 = step1.sum(axis=1)
loss = step2.sum(axis=0) / len(y)
return loss
def drop(self,x,drop_probability,train=True):
'''
The probability of neurons being discarded is p
'''
if train:
mask = nd.random.uniform(0,1,shape=x.shape,dtype='float32') > drop_probability
return mask * x / (1 - drop_probability)
else:
return x
def forward(self,x,train=True):
for layer in self.layers[:-1]:
step1 = layer.forward(x)
step2 = self.drop(step1,0.2,train)
x = step2
output_layer = self.layers[-1]
return self.softmax(output_layer.forward(x))
def sgd(self,learning_rate,batch_size):
'''
Update all weights and offsets with random gradient descent
'''
for layer in self.layers:
layer.sgd(learning_rate,batch_size)
def dataIter(self,x,y,batch_size):
dataset = gdata.ArrayDataset(x,y)
return gdata.DataLoader(dataset,batch_size,shuffle=True)
def fit(self,x,y,epoches,batch_size,learning_rate):
for epoch in range(epoches):
for x_batch,y_batch in self.dataIter(x,y,batch_size):
with autograd.record():
y_pred = self.forward(x_batch,train=True)
loss = self.softmaxCrossEntropyLoss(y_pred,y_batch)
loss.backward()
self.sgd(learning_rate,batch_size)
if epoch % 50 == 0:
y_pred_all = self.forward(x,train=False)
loss_all = self.softmaxCrossEntropyLoss(y_pred_all,y)
accuracy_score = self.accuracyScore(y_pred_all,y)
print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss_all,accuracy_score))
def predict(self,x):
y_pred = self.forward(x)
return y_pred.argmax(axis=0)
def accuracyScore(self,y_pred,y):
acc_sum = (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().asscalar()
return acc_sum / len(y)
class Layer:
def __init__(self,n_inputs,n_outputs):
weight = nd.random.normal(scale=0.01,shape=(n_inputs,n_outputs))
bias = nd.zeros(shape=(n_outputs))
self.params = [weight,bias]
def relu(self,x):
return nd.maximum(x,0)
def forward(self,x):
step1 = nd.dot(x,self.params[0]) + self.params[1]
return self.relu(step1)
def sgd(self,learning_rate,batch_size):
for param in self.params:
param[:] = param - learning_rate * param.grad / batch_size
def print_params(self):
for param in self.params:
print(param)net = NeuroNet(64,36,10)
net.fit(features,labels_onehot,epoches=500,batch_size=200,learning_rate=0.5)epoch:50,loss:
[2.2988722]
<NDArray 1 @cpu(0)>,accuracy:0.18308291597106288
epoch:100,loss:
[1.4126126]
<NDArray 1 @cpu(0)>,accuracy:0.7395659432387313
epoch:150,loss:
[0.46316707]
<NDArray 1 @cpu(0)>,accuracy:0.9259877573734001
epoch:200,loss:
[0.24678323]
<NDArray 1 @cpu(0)>,accuracy:0.9493600445186422
epoch:250,loss:
[0.17839472]
<NDArray 1 @cpu(0)>,accuracy:0.9610461880912632
epoch:300,loss:
[0.14298467]
<NDArray 1 @cpu(0)>,accuracy:0.9688369504730105
epoch:350,loss:
[0.1198809]
<NDArray 1 @cpu(0)>,accuracy:0.9738452977184195
epoch:400,loss:
[0.10388324]
<NDArray 1 @cpu(0)>,accuracy:0.9782971619365609
epoch:450,loss:
[0.0917427]
<NDArray 1 @cpu(0)>,accuracy:0.9827490261547023
epoch:500,loss:
[0.08237094]
<NDArray 1 @cpu(0)>,accuracy:0.9849749582637729print(' Predicted results :',net.predict(features[:10]))
print(' The real result :',labels[:10]) Predicted results :
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
<NDArray 10 @cpu(0)>
The real result :
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
<NDArray 10 @cpu(0)>2. Use mxnet Realization dropout
n_inputs = 64
n_hiddens = 36
n_outputs = 10
# Defining models
net = nn.Sequential()
net.add(nn.Dense(n_hiddens,activation='relu'))
net.add(nn.Dropout(rate=0.2))
net.add(nn.Dense(n_outputs))
# Initialize model
net.initialize(init.Normal(sigma=0.01))
# Loss function
loss = gloss.SoftmaxCrossEntropyLoss(sparse_label=False)
optimizer = trainer.Trainer(net.collect_params(), 'sgd', {'learning_rate':0.5})
# Training models
epoches = 500
batch_size = 200
dataset = gdata.ArrayDataset(features,labels_onehot)
dataIter = gdata.DataLoader(dataset,batch_size,shuffle=True)
for epoch in range(epoches):
for x_batch,y_batch in dataIter:
with autograd.record():
y_pred = net.forward(x_batch)
l = loss(y_pred, y_batch).sum() / batch_size
l.backward()
optimizer.step(batch_size)
if epoch % 50 == 0:
y_all_pred = net.forward(features)
acc_sum = (y_all_pred.argmax(axis=1) == labels_onehot.argmax(axis=1)).sum().asscalar()
print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss(y_all_pred,labels_onehot).sum() / len(labels_onehot),acc_sum/len(y_all_pred)))epoch:50,loss:
[2.2981045]
<NDArray 1 @cpu(0)>,accuracy:0.16304952698942682
epoch:100,loss:
[0.97166663]
<NDArray 1 @cpu(0)>,accuracy:0.867557039510295
epoch:150,loss:
[0.3836201]
<NDArray 1 @cpu(0)>,accuracy:0.9243183082915971
epoch:200,loss:
[0.24329802]
<NDArray 1 @cpu(0)>,accuracy:0.9449081803005008
epoch:250,loss:
[0.18068495]
<NDArray 1 @cpu(0)>,accuracy:0.9577072899276572
epoch:300,loss:
[0.14546551]
<NDArray 1 @cpu(0)>,accuracy:0.9660545353366722
epoch:350,loss:
[0.1219953]
<NDArray 1 @cpu(0)>,accuracy:0.9727323316638843
epoch:400,loss:
[0.10563282]
<NDArray 1 @cpu(0)>,accuracy:0.9760712298274903
epoch:450,loss:
[0.09357208]
<NDArray 1 @cpu(0)>,accuracy:0.9788536449638287
epoch:500,loss:
[0.08368526]
<NDArray 1 @cpu(0)>,accuracy:0.9816360601001669Publisher : Full stack programmer stack length , Reprint please indicate the source :https://javaforall.cn/120004.html Link to the original text :https://javaforall.cn
边栏推荐
- 快速学会配置yum的本地源和网络源,并学会yum的使用
- 【飞控开发基础教程3】疯壳·开源编队无人机-串口(基础收发)
- [visdrone data set] yolov7 training visdrone data set and results
- Marketing guide | several common micro blog marketing methods
- About the difference between BigDecimal conversion string toengineeringstring, toplainstring and toString
- Digital intelligence transformation, management first | jnpf strives to build a "full life cycle management" platform
- ES:Compressor detection can only be called on some xcontent bytes or compressed xcontent bytes
- Is it safe for Guosen Securities to open an account? How can I find the account manager
- Video media video
- C # method to read the text content of all files in the local folder
猜你喜欢
![Sharing of 40 completed projects of high-quality information management specialty [source code + Thesis] (VI)](/img/b9/629449d3c946b017075ed42eaa81bf.png)
Sharing of 40 completed projects of high-quality information management specialty [source code + Thesis] (VI)

Alibaba side: analysis of ten classic interview questions
![[flight control development basic tutorial 3] crazy shell · open source formation UAV - serial port (basic transceiver)](/img/25/160b827d74f7902ec0d6be4683b1e5.png)
[flight control development basic tutorial 3] crazy shell · open source formation UAV - serial port (basic transceiver)

Take you a minute to learn about symmetric encryption and asymmetric encryption

In May, 2022, video user insight: user use time increased, and the platform achieved initial results in cost reduction and efficiency increase

About the difference between BigDecimal conversion string toengineeringstring, toplainstring and toString

Digital intelligence transformation, management first | jnpf strives to build a "full life cycle management" platform

带你一分钟了解对称加密和非对称加密

【飞控开发基础教程2】疯壳·开源编队无人机-定时器(LED 航情灯、指示灯闪烁)

How to connect tdengine with idea database tool?
随机推荐
Alibaba side: analysis of ten classic interview questions
Merge multiple row headers based on apache.poi operation
"Green is better than blue". Why is TPC the last white lotus to earn interest with money
regular expression
The difference and efficiency comparison of three methods of C # conversion integer
Probe of kubernetes
[classification] vgg16 training record
Is it safe for Guosen Securities to open an account? How can I find the account manager
2022-2023 topic recommendation of information management graduation project
Replicationcontroller and replicaset of kubernetes
Implementing DDD based on ABP -- aggregation and aggregation root practice
Three misunderstandings of CRM implementation: lack of strategy, lack of identity, and technology first
2 - configuration and use of routes
Sharing of 40 completed projects of high-quality information management specialty [source code + Thesis] (VI)
【无标题】
PyQt5快速开发与实战 3.4 信号与槽关联
On the evolution of cloud native edge computing framework
Packet capturing and streaming software and network diagnosis
Thoroughly uncover how epoll realizes IO multiplexing
37. [categories of overloaded operators]