当前位置:网站首页>SOM Network 2: Implementation of the Code
SOM Network 2: Implementation of the Code
2022-08-01 21:48:00 【@BangBang】
SOMPrinciples of Self-Organizing Mapping Neural Networks,详见博客:SOM网络1:原理讲解
The main function for training
train_SO代码如下:
def train_SOM(X, # Output node row count
Y, # Number of output node columns
N_epoch, # epoch
datas, # 训练数据(N x D) N个D维样本
init_lr=0.5, # 初始化学习率 lr
sigma = 0.5, # 初始化 sigma Used to update domain node weights
dis_func = euclidean_distance, # 距离公式 Default Euler distance
neighborhood_func = gaussion_neighborhood, # Neighborhood node weight formulag Default Gaussian function
init_weight_fun=None, #初始化权重函数
seed=10):
# Get the feature dimension of the input
N,D =np.shape(datas)
# 训练的步数
N_steps =N_epoch*N
#对权重进行初始化
rng = np.random.RandomState(seed)
if init_weight_fun is None:
weights =rng.rand(X,Y,D)*2-1 #随机初始化
weights /=np.linalg.norm(weights,axis=-1,keepdims=True) #标准化
else:
weights = init_weight_fun(X,Y,datas) # 一般使用PCA初始化
PCA 初始化权重
def weights_PCA(X,Y,data):
N,D=np.shape(data)
weights=np.zeros([X,Y,D])
pc_value,pc=np.linalg.eig(np.conv(np.transpose(data))) # pc_vale为特征值,pc 为特征向量 DXD维
pc_order=np.argsort(-pc_value) # 特征值从大到小排序,并返回Index
# 对W:[X,Y,D]进行初始化
for i,c1 in enumerate(np.linspace(-1,1,X)):
for j,c2 in enumerate(np.linsapce(-1,1,Y)):
weights[i,j]=c1*pc[pc_order[0]]+c2*pc[pc_order[1]] #Take advantage of the biggest2The eigenvectors corresponding to the eigenvalues are weighted and combinedi,j位置的Ddimensional representation vector
完整的训练代码
def train_SOM(X, # Output node row count
Y, # Number of output node columns
N_epoch, # epoch
datas, # 训练数据(N x D) N个D维样本
init_lr=0.5, # 初始化学习率 lr
sigma = 0.5, # 初始化 sigma Used to update domain node weights
dis_func = euclidean_distance, # 距离公式 Default Euler distance
neighborhood_func = gaussion_neighborhood, # Neighborhood node weight formulag Default Gaussian function
init_weight_func=weights_PCA, #初始化权重函数
seed=10):
# Get the feature dimension of the input
N,D =np.shape(datas)
# 训练的步数
N_steps =N_epoch*N
#对权重进行初始化
rng = np.random.RandomState(seed)
if init_weight_func is None:
weights =rng.rand(X,Y,D)*2-1 #随机初始化
weights /=np.linalg.norm(weights,axis=-1,keepdims=True) #标准化
else:
weights = init_weight_fun(X,Y,datas) # 一般使用PCA初始化
for n_epoch in range(N_epoch):
print("Epoch %d" %(n_epoch+1))
#Shuffle the sample order
index=rng.permulation(np.arange(N))
for n_step,_id in enumerate(index):
# 取一个样本
x=datas[_id]
#计算learning rate (eta)
t=N*n_epoch + n_step
eta=get_learning_rate(init_lr,t,N_steps)
#Calculate the distance of each node of the sample distance output,and get the location of the activation point
winner=get_winner_index(x,weights,dis_func)
#根据激活点的位置计算临近点的权重 随着迭代的进行sigmaIt also needs to be continuously reduced
new_sigma=get_learning_rate(sigma,t,N_steps) # sigma The update is done in the same way as the learning rate
g=neighborhood_fun(X,Y,winner,new_sigma)
g=g*eta
#进行权重的更新
weights = weights + np.expand_dims(g,-1)*(x-weights)
# 打印量化误差
print("quantization_error=%.4f" %(get_quantization_error(data,weights)))
return weights
#计算学习率
def get_learning_rate(lr,t,max_steps): # t当前的steps max_steps=N x epoch (N样本数)
return lr/(1+t/(max_steps/2))
# 获取激活(winning points)节点的位置,与xThe output node position with the smallest distance
def get_winner_index(x,w,dis_func=euclidean_distance):
# 计算输入样本和各个节点的距离
dis = dis_func(x,w)
#找到距离最小的位置
index=np.where(dis ==np.min(dis))
return (index[0][0],index[1][0])
#利用高斯距离法计算临近点的权重
# X,Y模板大小,c中心点的位置
def gaussion_neighborhood(X,Y,c,sigma)
xx,yy=np.meshgrid(np.arange(X),np.arange(Y))
d=2*sigma*sigma
ax=np.exp(-np.power(xx-xx.T[c],2)/d)
ay=np.exp(-np.power(yy-yy.T[c],2)/d)
return (ax*ay).T
# 计算欧式距离
def euclidean_distance(x,w):
dis=np.expand_dims(x,axis=(0,1))-w # x:D w:[X,Y,D] Therefore, two dimensions need to be added x:D->x:[1,1,D]
return np.linalg.norm(dis,axis=-1) # 输出[X,Y] 二范数 is the Euler distance
# 特征标准化 (x-mu)/std
def feature_normalization(data):
mu=np.mean(data,axis=0,keepdims=True)
sigma=np.std(data,axis=0,keepdims=True)
return (data-mu)/sigma
def get_U_Matrix(weights):
X,Y,D=np.shape(weights)
um=na.nan * np.zeros((X,Y,8)) #8 领域
ii=[0 ,-1,-1,-1,0,1,1, 1]
jj=[-1,-1, 0, 1,1,1,0,-1]
for x in range(X):
for y in range(Y):
w_2=weights[x,y]
for k,(i,j) in enumerate(zip(ii,jj)):
if(x+i >=0 and x+i<X and y+j>=0 and y+j <Y):
w_1=weights[x+i,y+j]
um[x,y,k]=np.linalg.norm(w_1-w_2)
um=np.nansum(um,axis=2)
return um/um.max()
#计算量化误差 Calculate the average distance between each sample point and the mapped point
def get_quantization_error(data,weights):
w_x,w_y=zip(*[get_winner_index(d,weights) for d in datas])
error=datas-weights[w_x,w_y] # The distance to the cluster center of the data domain
error=np.linalg.norm(error,axis=-1)
return np.mean(error)
训练完成后,Returns the output node'sweights,维度为 [ X , Y , D ] [X,Y,D] [X,Y,D], Equivalent to solidifying the weights of the modelweights, weightsrepresents the current training sample.
测试
if __name__ == "__main__":
# seed 数据展示
columns=['area','perimeter','compactness','length_kernel','width_kernel',
'asymmetry_coefficient','length_kernel_groove','target']
data = pd.read_csv('seeds_dataset.txt',names=columns,sep='\t+',engine='python')
labs=data['target'].values
lab_names={
1:'Kama',2:'Rosa',3:'Canadian'}
datas=data[data.columns[:-1]].values
N,D=np.shape(datas)
print(N,D)
# 对训练数据进行标准化
datas = feature_normalization(datas)
#SOM的训练
weights=train_SOM()X=9,Y=9,N_epoch=2,datas=datas,sigma=1.5,init_weight_func=weights_PCA)
# 获取UMAP 用于可视化
UM=get_U_Matrix(weights)
plt.figure(figure=(9,9))
plt.pcolor(UM.T,cmap='bone_r') #plotting the distance map as background
plt.colorbar()
测试数据
U_Matrix
- The darker the color, the stronger the relationship with neighboring points,Stronger colors indicate less strong relationships with neighboring points.
Test the effect of classification
```python
if __name__ == "__main__":
# seed 数据展示
columns=['area','perimeter','compactness','length_kernel','width_kernel',
'asymmetry_coefficient','length_kernel_groove','target']
data = pd.read_csv('seeds_dataset.txt',names=columns,sep='\t+',engine='python')
labs=data['target'].values
lab_names={
1:'Kama',2:'Rosa',3:'Canadian'}
datas=data[data.columns[:-1]].values
N,D=np.shape(datas)
print(N,D)
# 对训练数据进行标准化
datas = feature_normalization(datas)
#SOM的训练
weights=train_SOM()X=9,Y=9,N_epoch=2,datas=datas,sigma=1.5,init_weight_func=weights_PCA)
# 获取UMAP 用于可视化
UM=get_U_Matrix(weights)
plt.figure(figure=(9,9))
plt.pcolor(UM.T,cmap='bone_r') #plotting the distance map as background
plt.colorbar()
# See the effect of classification
markers=['o','s','D']
colors =['C0','C1','C2']
for i in range(N):
x =datas[i]
w=get_winner_index(x,weights)
i_lab=labs[i]-1
plt.plot(w[0]+.5,w[1]+.5,markers[i_lab],markerfacecolor='None'
markeredgecolor=colors[i_lab],markersize=12,markeredgewidth=2)
plt.show()

边栏推荐
- Yizhou Financial Analysis | The intelligent transformation of bank ATM machines is accelerated; the new Internet loan regulations bring challenges
- 一个关于操作数据库的建议—用户密码
- Spark shuffle调优
- Small program -- subcontracting
- Unity Shader general lighting model code finishing
- LeetCode952三部曲之二:小幅度优化(137ms -> 122ms,超39% -> 超51%)
- The difference between groupByKey and reduceBykey
- 基于php影视资讯网站管理系统获取(php毕业设计)
- 微软校园大使喊你来秋招啦!
- Based on php online music website management system acquisition (php graduation design)
猜你喜欢

KMP 字符串匹配问题

groupByKey和reduceBykey的区别

51.【结构体初始化的两种方法】

第一讲 测试知多少

游戏元宇宙发展趋势展望分析

scikit-learn no moudule named six

Based on php film and television information website management system acquisition (php graduation design)

找工作必备!如何让面试官对你刮目相看,建议收藏尝试!!

The difference between groupByKey and reduceBykey

SAP ABAP OData 服务如何支持删除(Delete)操作试读版
随机推荐
Scala practice questions + answers
shell脚本
你居然不懂Bitmap和Drawable? 相关知识大扫盲
数据分析面试手册《指标篇》
上传markdown文档到博客园
关于npm的那些事儿
Yizhou Financial Analysis | The intelligent transformation of bank ATM machines is accelerated; the new Internet loan regulations bring challenges
FusionGAN:A generative adversarial network for infrared and visible image fusion文章学习笔记
第3讲:MySQL数据库中常见的几种表字段数据类型
shell programming conventions and variables
ImportError: `save_weights` requires h5py.问题解决
多商户商城系统功能拆解19讲-平台端发票管理
用户量大,Redis没法缓存响应,数据库宕机?如何排查解决?
LVS负载均衡群集
Scala练习题+答案
统计单词数
工程建筑行业数据中台指标分析
C Pitfalls and Defects Chapter 7 Portability Defects 7.7 Truncation During Division
Recycling rental system 100% open source without encryption Mall + recycling + rental
C Pitfalls and Defects Chapter 7 Portability Defects 7.8 Size of Random Numbers