当前位置:网站首页>Self taught neural network series - 4 learning of neural network
Self taught neural network series - 4 learning of neural network
2022-06-26 09:09:00 【ML_ python_ get√】
Learning neural networks
4.1 Loss function
- least square
- Maximum likelihood
- Cross entropy
# Least squares loss function
import numpy as np
y1 = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
y2 = [0.1,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
t = [0,0,1,0,0,0,0,0,0,0]
def mean_squared_error(y,t):
return 0.5*np.sum((y-t)**2)
# list Can't reduce Convert to array
mean_squared_error(np.array(y1),np.array(t))
# Forecast classification as index 2 The mean square error of is 0.09750000000000003
mean_squared_error(np.array(y2),np.array(t))
# Forecast classification as index 7 The mean square error of is 0.5975
# Cross entropy loss function
# The amount of information is inversely proportional to the probability , The smaller the probability , More information is required to become a deterministic event
# Entropy is a measure of the degree of chaos in a system , Is the expected value of the amount of information in the system
# Measure whether the two systems are close , Measure whether the entropy difference between two systems is 0
# When a system is in an ideal state , Just measure the cross entropy
# Gibbs inequality : Cross entropy is greater than entropy ( Comes at a time )
# So the smaller the cross entropy , The closer the two systems are
# So cross entropy is consistent with the definition of loss function
# The greater the probability of output , The smaller the loss function
def cross_entropy_error0(y,t):
delta = 1e-7
return -np.sum(t*np.log(y+delta)) # Multiply the corresponding elements ,delta The probability of prevention is 0 Can't calculate
t = [0,0,1,0,0,0,0,0,0,0]
y1 = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
y2 = [0.1,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
cross_entropy_error0(np.array(y1), np.array(t))
cross_entropy_error0(np.array(y2), np.array(t))
# 0.510825457099338
# 2.302584092994546
# min-batch Study
import sys,os
sys.path.append(os.pardir)
import numpy as np
from res.mnist import load_mnist
(x_train,t_train),(x_test,t_test) =\
load_mnist(normalize=True,one_hot_label=True)
print(x_train.shape)
print(t_train.shape)
# Random sampling 10 Pen observations
train_size = x_train.shape[0]
batch_size =10
batch_mask = np.random.choice(train_size,batch_size) # From the specified sample , Select a random sample index
x_batch = x_train[batch_mask]
t_baych = t_train[batch_mask]
# one-hot code
def cross_entropy_error(y,t):
if y.ndim == 1:
t = t.reshape(1,t.size)
y = y.reshape(1,y.size)
batch_size = y.shape[0]
return -np.sum(t*np.log(y+1e-7))/batch_size
# label
# The cross entropy corresponding to the single heat coding is only encoded as 1 The element of the , So just find the output corresponding to the tag y The cross entropy can be calculated
def cross_entropy_label(y,t):
if y.ndim ==1:
t = t.reshape(1,t.size)
y = y.reshape(1,y.size)
batch_size = y.shape[0]
# Must be in order t,y
return -np.sum(np.log(y[np.arange(batch_size),t]+1e-7))/batch_size
# y[np.arange(batch_size),t] Fancy index , Returns the... Of the tuple index y
4.2 Numerical differentiation
- 1、 The degree of change is infinitesimal , But the computer will omit the value after a certain decimal point , So the degree of change is generally 10^(-4)
- 2、 Not the derivative of a point , Generally go to x-h,x+h The differential between
- 3、 So it is called numerical differentiation
def numerical_diff(f,x):
h = 1e-4
return (f(x+h)-f(x-h))/(2*h)
def func(x):
return 0.01*x**2 + 0.1*x
numerical_diff(func,10)
def tangent_line(f, x):
''' Tangent line '''
d = numerical_diff(f, x) # Slope
y = f(x) - d*x # intercept : adopt x,f(x) This point
return lambda t: d*t + y # Tangent line
# visualization
import numpy as np
import matplotlib.pylab as plt
x = np.arange(0.0,20.0,0.1)
y1 = func(x)
tf = tangent_line(func,10)
y2 = tf(x)
plt.plot(x,y1)
plt.plot(x,y2)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.show()
# Partial derivative
def func_2(x):
return np.sum(x**2)
def numerical_gradient(f,x):
''' gradient '''
h =1e-4
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) # Multidimensional iterators
# Iterating continuously by line , Calculate the gradient for each row , Until completion
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = tmp_val +h
fxh1 = f(x)
x[idx] = tmp_val -h
fxh2 = f(x)
grad[idx] = (fxh1-fxh2)/(2*h)
x[idx] = tmp_val
it.iternext()
return grad
# Numerical differentiation is computed iteratively through known data , Different from the theoretical derivative , First calculate the formula , Re substitution
# Gradient descent method
def gradient_descent(f,init_x,lr=0.01,step_num=1000):
x = init_x
for i in range(step_num):
grad = numerical_gradient(f,x)
x -= lr*grad
return x
# test
numerical_gradient(func_2,np.array([3.0,4.0])) # The results of floating-point numbers and integers are very different
x = gradient_descent(func_2,np.array([-3.0,4.0]),lr=0.1)
func_2(x)
# Take the neural network as an example to calculate the gradient
def softmax(x):
a = np.max(x)
exp_x = np.exp(x-a)
exp_sum = np.sum(exp_x)
y = exp_x/exp_sum
return y
import numpy as np
class SimpleNet:
def __init__(self):
self.W = np.random.randn(2,3) # 2*3 Single layer perceptron
def predict(self,x):
return np.dot(x,self.W)
def loss(self,x,t):
z = self.predict(x)
y = softmax(z)
loss = cross_entropy_error(y, t)
return loss
# test
net = SimpleNet()
print(net.W)
x = np.array([0.6,0.9])
p = net.predict(x)
print(p)
# np.argmax(p)
t = np.array([0,0,1])
# net.loss(x,t)
# def f(W):
# return net.loss(x,t)
# More concise anonymous functions
f = lambda W: net.loss(x,t)
print(f(2))
print(f(1))
# W Can't pass it in
dW = numerical_gradient(f,net.W)
print(dW)
# net.W After the incoming W+h W-h change
# f(x+h)\f(x) Will it change ?
# f from net.predict(x) Calculation
# and predict(x) from x*self.W Calculation
# therefore f There is a change
# Contained in the class self.W All methods can be adopted in this way Pass in the parameter
4.3 Two layer neural network learning
- Random selection min-batch Calculate the loss function
- Find the gradient of the loss function
- Update parameters
- Until the gradient is 0 Or reach the maximum number of steps
def sigmoid(x):
return 1/(1+np.exp(-x))
def sigmoid_grad(x):
return (1.0 - sigmoid(x)) * sigmoid(x)
# 2 Layer neural network gradient calculation
class TwoLayerNet:
def __init__(self,input_size,hidden_size,output_size,weight_std=0.01):
# Initialization method , That is, the method that generates the class call
self.params = {
}
self.params['W1'] = weight_std*np.random.randn(input_size,hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_std*np.random.randn(hidden_size,output_size)
self.params['b2'] = np.zeros(output_size)
def predict(self,x):
W1,W2 = self.params['W1'],self.params['W2']
b1,b2 = self.params['b1'],self.params['b2']
a1 = np.dot(x,W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1,W2) + b2
y = softmax(a2)
return y
def loss(self,x,t):
y = self.predict(x)
return cross_entropy_error(y, t)
def accuracy(self,x,t):
y = self.predict(x)
y = np.argmax(y,axis=1)
t = np.argmax(t,axis=1)
accuracy = np.sum(y==t)/float(x.shape[0])
return accuracy
def numerical_gradient(self,x,t):
loss_W = lambda W: self.loss(x,t)
grads = {
}
grads['W1'] = numerical_gradient(loss_W,self.params['W1'])
grads['W2'] = numerical_gradient(loss_W,self.params['W2'])
grads['b1'] = numerical_gradient(loss_W,self.params['b1'])
grads['b2'] = numerical_gradient(loss_W,self.params['b2'])
return grads
def gradient(self, x, t):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
grads = {
}
batch_num = x.shape[0]
# forward
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
# backward
dy = (y - t) / batch_num
grads['W2'] = np.dot(z1.T, dy)
grads['b2'] = np.sum(dy, axis=0)
da1 = np.dot(dy, W2.T)
dz1 = sigmoid_grad(a1) * da1
grads['W1'] = np.dot(x.T, dz1)
grads['b1'] = np.sum(dz1, axis=0)
return grads
net = TwoLayerNet(784,100,10)
net.params['W1'].shape
net.params['b1'].shape
net.params['W2'].shape
net.params['b2'].shape
x = np.random.randn(100,784)
t = np.random.randn(100,10)
y = net.predict(x)
print(y.shape)
grads = net.gradient(x,t)
grads
from res.mnist import load_mnist
(x_train,t_train),(x_test,t_test) = load_mnist(normalize=True,one_hot_label=True)
train_loss_list = []
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(784,50,10)
# min_batch choice
for i in range(iters_num):
batch_mask = np.random.choice(train_size,batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# gradient
grad = network.gradient(x_batch,t_batch)
# gradient descent
for key in ('W1','b1','W2','b2'):
network.params[key] -= learning_rate * grad[key]
# Record the learning process
loss = network.loss(x_batch,t_batch)
train_loss_list.append(loss)
4.4 Evaluation of neural network
(x_train,t_train),(x_test,t_test) = load_mnist(normalize=True,one_hot_label=True)
train_loss_list = []
train_acc_list = []
test_acc_list = []
# Required to traverse all data sets min_batch Selection times
iter_per_epoch = max(train_size/batch_size,1)
iters_num = 10000
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(784, 50, 10)
for i in range(iters_num):
# obtain min_batch
batch_mask = np.random.choice(train_size,batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# gradient
grad = network.gradient(x_batch,t_batch)
# gradient descent
for key in ('W1','b1','W2','b2'):
network.params[key] -= grad[key]*learning_rate
loss = network.loss(x_batch,t_batch)
train_loss_list.append(loss)
# Calculate each epoch The recognition accuracy of
if i%iter_per_epoch==0:
# Just traverse the sample once
train_acc = network.accuracy(x_train,t_train)
test_acc = network.accuracy(x_test,t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc |" + str(train_acc)+" , "+str(test_acc))
x = np.arange(len(train_acc_list))
plt.plot(x,train_acc_list,label='train_acc')
plt.plot(x,test_acc_list,label='test_acc',linestyle="--")
plt.xlabel('epochs')
plt.ylabel('accuracy')
边栏推荐
- What is optimistic lock and what is pessimistic lock
- Upgrade phpcms applet plug-in API interface to 4.3 (add batch acquisition interface, search interface, etc.)
- 唯品会工作实践 : Json的deserialization应用
- 滑块验证 - 亲测 (京东)
- 【IVI】15.1.2 系统稳定性优化篇(LMKD Ⅱ)PSI 压力失速信息
- 【MATLAB GUI】 键盘回调中按键识别符查找表
- ImportError: ERROR: recursion is detected during loading of “cv2“ binary extensions. Check OpenCV in
- Construction and verification of mongodb sharding environment (redis final assignment)
- 力扣399【除法求值】【并查集】
- External sorting and heap size knowledge
猜你喜欢

滑块验证 - 亲测 (京东)

直播回顾 | smardaten李鸿飞解读中国低/无代码行业研究报告:风向变了

Phpcms V9 remove the phpsso module

框架跳转导致定位失败的解决方法

Reverse crawling verification code identification login (OCR character recognition)

uniapp用uParse实现解析后台的富文本编辑器的内容及修改uParse样式

修复小程序富文本组件不支持video视频封面、autoplay、controls等属性问题

XSS cross site scripting attack

Phpcms applet plug-in version 4.0 was officially launched

phpcms小程序插件4.0版正式上线
随机推荐
Phpcms applet interface new universal interface get_ diy. php
phpcms v9后台文章列表增加一键推送到百度功能
Introduction to common classes on the runtime side
[program compilation and pretreatment]
cookie session 和 token
Sublime Text3 common plug-ins
行为树XML文件 热加载
Practice is the fastest way to become a network engineer
运行时端常用类的介绍
隐藏式列表菜单以及窗口转换在Selenium 中的应用
攔截器與過濾器的實現代碼
Yolov5 advanced camera real-time acquisition and recognition
爬虫 对 Get/Post 请求时遇到编码问题的解决方案
1.26 pytorch learning
Unity connects to Turing robot
Unity 接入图灵机器人
dedecms小程序插件正式上线,一键安装无需任何php或sql基础
Selenium builds cookies pool to bypass authentication and anti crawl login
Which software is safer to open an account on
phpcms小程序插件4.0版正式上线