当前位置:网站首页>机器学习基础-主成分分析PCA-16
机器学习基础-主成分分析PCA-16
2022-07-28 11:51:00 【gemoumou】
主成分分析PCA(Principal Component Analysis)













PCA-简单例子
import numpy as np
import matplotlib.pyplot as plt
# 载入数据
data = np.genfromtxt("data.csv", delimiter=",")
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)
plt.show()
print(x_data.shape)

# 数据中心化
def zeroMean(dataMat):
# 按列求平均,即各个特征的平均
meanVal = np.mean(dataMat, axis=0)
newData = dataMat - meanVal
return newData, meanVal
newData,meanVal=zeroMean(data)
# np.cov用于求协方差矩阵,参数rowvar=0说明数据一行代表一个样本
covMat = np.cov(newData, rowvar=0)
# 协方差矩阵
covMat

# np.linalg.eig求矩阵的特征值和特征向量
eigVals, eigVects = np.linalg.eig(np.mat(covMat))
# 特征值
eigVals

# 特征向量
eigVects

# 对特征值从小到大排序
eigValIndice = np.argsort(eigVals)
eigValIndice

top = 1
# 最大的top个特征值的下标
n_eigValIndice = eigValIndice[-1:-(top+1):-1]
n_eigValIndice

# 最大的n个特征值对应的特征向量
n_eigVect = eigVects[:,n_eigValIndice]
n_eigVect

# 低维特征空间的数据
lowDDataMat = newData*n_eigVect
lowDDataMat

# 利用低纬度数据来重构数据
reconMat = (lowDDataMat*n_eigVect.T) + meanVal
reconMat

# 载入数据
data = np.genfromtxt("data.csv", delimiter=",")
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)
# 重构的数据
x_data = np.array(reconMat)[:,0]
y_data = np.array(reconMat)[:,1]
plt.scatter(x_data,y_data,c='r')
plt.show()

手写数字识别降维可视化
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
digits = load_digits()#载入数据
x_data = digits.data #数据
y_data = digits.target #标签
x_train,x_test,y_train,y_test = train_test_split(x_data,y_data) #分割数据1/4为测试数据,3/4为训练数据
x_data.shape

mlp = MLPClassifier(hidden_layer_sizes=(100,50) ,max_iter=500)
mlp.fit(x_train,y_train)

# 数据中心化
def zeroMean(dataMat):
# 按列求平均,即各个特征的平均
meanVal = np.mean(dataMat, axis=0)
newData = dataMat - meanVal
return newData, meanVal
def pca(dataMat,top):
# 数据中心化
newData,meanVal=zeroMean(dataMat)
# np.cov用于求协方差矩阵,参数rowvar=0说明数据一行代表一个样本
covMat = np.cov(newData, rowvar=0)
# np.linalg.eig求矩阵的特征值和特征向量
eigVals, eigVects = np.linalg.eig(np.mat(covMat))
# 对特征值从小到大排序
eigValIndice = np.argsort(eigVals)
# 最大的n个特征值的下标
n_eigValIndice = eigValIndice[-1:-(top+1):-1]
# 最大的n个特征值对应的特征向量
n_eigVect = eigVects[:,n_eigValIndice]
# 低维特征空间的数据
lowDDataMat = newData*n_eigVect
# 利用低纬度数据来重构数据
reconMat = (lowDDataMat*n_eigVect.T) + meanVal
# 返回低维特征空间的数据和重构的矩阵
return lowDDataMat,reconMat
lowDDataMat,reconMat = pca(x_data,2)
# 重构的数据
x = np.array(lowDDataMat)[:,0]
y = np.array(lowDDataMat)[:,1]
plt.scatter(x,y,c='r')
plt.show()

predictions = mlp.predict(x_data)
# 重构的数据
x = np.array(lowDDataMat)[:,0]
y = np.array(lowDDataMat)[:,1]
plt.scatter(x,y,c=y_data)
plt.show()

lowDDataMat,reconMat = pca(x_data,3)
from mpl_toolkits.mplot3d import Axes3D
x = np.array(lowDDataMat)[:,0]
y = np.array(lowDDataMat)[:,1]
z = np.array(lowDDataMat)[:,2]
ax = plt.figure().add_subplot(111, projection = '3d')
ax.scatter(x, y, z, c = y_data, s = 10) #点为红色三角形
plt.show()

sklearn-手写数字降维预测
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn import decomposition
import matplotlib.pyplot as plt
digits = load_digits()#载入数据
x_data = digits.data #数据
y_data = digits.target #标签
x_train,x_test,y_train,y_test = train_test_split(x_data,y_data) #分割数据1/4为测试数据,3/4为训练数据
mlp = MLPClassifier(hidden_layer_sizes=(100,50) ,max_iter=500)
mlp.fit(x_train,y_train )

predictions = mlp.predict(x_test)
print(classification_report(predictions, y_test))
print(confusion_matrix(predictions, y_test))

pca = decomposition.PCA()
pca.fit(x_data)

# 方差
pca.explained_variance_

# 方差占比
pca.explained_variance_ratio_

variance = []
for i in range(len(pca.explained_variance_ratio_)):
variance.append(sum(pca.explained_variance_ratio_[:i+1]))
plt.plot(range(1,len(pca.explained_variance_ratio_)+1), variance)
plt.show()

pca = decomposition.PCA(whiten=True,n_components=0.8)
pca.fit(x_data)

pca.explained_variance_ratio_

x_train_pca = pca.transform(x_train)
mlp = MLPClassifier(hidden_layer_sizes=(100,50) ,max_iter=500)
mlp.fit(x_train_pca,y_train )

x_test_pca = pca.transform(x_test)
predictions = mlp.predict(x_test_pca)
print(classification_report(predictions, y_test))
print(confusion_matrix(predictions, y_test))

边栏推荐
- 机器学习实战-神经网络-21
- Kafaka丢消息吗
- Solution to the binary tree problem of niuke.com
- 西门子对接Leuze BPS_304i 笔记
- Did kafaka lose the message
- leetcode:704二分查找
- Uncover why devaxpress WinForms, an interface control, discards the popular maskbox property
- 新东方单季营收5.24亿美元同比降56.8% 学习中心减少925间
- Distributed session solution
- MySQL limit paging optimization
猜你喜欢

奥浦迈生物通过注册:半年营收1.47亿 国寿成达与达晨是股东

LeetCode84 柱状图中最大的矩形

Merge sort

非标自动化设备企业如何借助ERP系统,做好产品质量管理?

Quick read in

Marketing play is changeable, and understanding the rules is the key!

设计一个线程池

Initialization examples of several modes of mma8452q

Jinshanyun rushes to the dual main listing of Hong Kong stocks: the annual revenue of 9billion is a project supported by Lei Jun

金山云冲刺港股拟双重主要上市:年营收90亿 为雷军力挺项目
随机推荐
Cloud native - runtime environment
Which big model is better? Openbmb releases bmlist to give you the answer!
MMA8452Q几种模式的初始化实例
How to realize more multimedia functions through the ffmpeg library and NaPi mechanism integrated in openharmony system?
FlexPro软件:生产、研究和开发中的测量数据分析
区块反转(暑假每日一题 7)
MSP430 开发中遇到的坑(待续)
GMT installation and use
JSP自定义标签之自定义分页标签02
快速读入
Under what circumstances can the company dismiss employees
云原生—运行时环境
Minimally invasive electrophysiology has passed the registration: a listed enterprise with annual revenue of 190million minimally invasive mass production
1331. Array sequence number conversion: simple simulation question
Monotonic stack
C语言项目中使用json
Leetcode394 string decoding
mysql limit 分页优化
Leetcode206 reverse linked list
Leetcode: array