当前位置：网站首页>Fundamentals of machine learning - principal component analysis pca-16

Fundamentals of machine learning - principal component analysis pca-16

2022-07-28 12:52:00 【gemoumou】

Principal component analysis PCA(Principal Component Analysis)

Insert picture description here

PCA- A simple example

import numpy as np
import matplotlib.pyplot as plt

#  Load data 
data = np.genfromtxt("data.csv", delimiter=",")
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)
plt.show()
print(x_data.shape)

Insert picture description here

#  Data centric 
def zeroMean(dataMat):
    #  Average by column , That is, the average of each feature 
    meanVal = np.mean(dataMat, axis=0) 
    newData = dataMat - meanVal
    return newData, meanVal

newData,meanVal=zeroMean(data)  
# np.cov Used to find the covariance matrix , Parameters rowvar=0 Explain that a row of data represents a sample 
covMat = np.cov(newData, rowvar=0)

#  Covariance matrix 
covMat

Insert picture description here

# np.linalg.eig Find the eigenvalue and eigenvector of the matrix 
eigVals, eigVects = np.linalg.eig(np.mat(covMat))

#  The eigenvalue 
eigVals

Insert picture description here

#  Eigenvector 
eigVects

Insert picture description here

#  Sort the eigenvalues from small to large 
eigValIndice = np.argsort(eigVals)
eigValIndice

Insert picture description here

top = 1
#  maximal top Subscript of an eigenvalue 
n_eigValIndice = eigValIndice[-1:-(top+1):-1]

n_eigValIndice

Insert picture description here

#  maximal n The eigenvectors corresponding to the eigenvalues 
n_eigVect = eigVects[:,n_eigValIndice]
n_eigVect

Insert picture description here

#  Data in low dimensional feature space 
lowDDataMat = newData*n_eigVect
lowDDataMat

Insert picture description here

#  Using low latitude data to reconstruct data 
reconMat = (lowDDataMat*n_eigVect.T) + meanVal
reconMat

Insert picture description here

#  Load data 
data = np.genfromtxt("data.csv", delimiter=",")
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)

#  Reconstructed data 
x_data = np.array(reconMat)[:,0]
y_data = np.array(reconMat)[:,1]
plt.scatter(x_data,y_data,c='r')
plt.show()

Insert picture description here

Dimension reduction visualization of handwritten numeral recognition

from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
import numpy as np
import matplotlib.pyplot as plt

digits = load_digits()# Load data 
x_data = digits.data # data 
y_data = digits.target # label 

x_train,x_test,y_train,y_test = train_test_split(x_data,y_data) # Split data 1/4 For test data ,3/4 For training data

x_data.shape

Insert picture description here

mlp = MLPClassifier(hidden_layer_sizes=(100,50) ,max_iter=500)
mlp.fit(x_train,y_train)

Insert picture description here

#  Data centric 
def zeroMean(dataMat):
    #  Average by column , That is, the average of each feature 
    meanVal = np.mean(dataMat, axis=0) 
    newData = dataMat - meanVal
    return newData, meanVal

def pca(dataMat,top):
    #  Data centric 
    newData,meanVal=zeroMean(dataMat) 
    # np.cov Used to find the covariance matrix , Parameters rowvar=0 Explain that a row of data represents a sample 
    covMat = np.cov(newData, rowvar=0)
    # np.linalg.eig Find the eigenvalue and eigenvector of the matrix 
    eigVals, eigVects = np.linalg.eig(np.mat(covMat))
    #  Sort the eigenvalues from small to large 
    eigValIndice = np.argsort(eigVals)
    #  maximal n Subscript of an eigenvalue 
    n_eigValIndice = eigValIndice[-1:-(top+1):-1]
    #  maximal n The eigenvectors corresponding to the eigenvalues 
    n_eigVect = eigVects[:,n_eigValIndice]
    #  Data in low dimensional feature space 
    lowDDataMat = newData*n_eigVect
    #  Using low latitude data to reconstruct data 
    reconMat = (lowDDataMat*n_eigVect.T) + meanVal
    #  Return the data of low dimensional feature space and reconstructed matrix 
    return lowDDataMat,reconMat

lowDDataMat,reconMat = pca(x_data,2)

#  Reconstructed data 
x = np.array(lowDDataMat)[:,0]
y = np.array(lowDDataMat)[:,1]
plt.scatter(x,y,c='r')
plt.show()

Insert picture description here

predictions = mlp.predict(x_data)

#  Reconstructed data 
x = np.array(lowDDataMat)[:,0]
y = np.array(lowDDataMat)[:,1]
plt.scatter(x,y,c=y_data)
plt.show()

Insert picture description here

lowDDataMat,reconMat = pca(x_data,3)

from mpl_toolkits.mplot3d import Axes3D  
x = np.array(lowDDataMat)[:,0]
y = np.array(lowDDataMat)[:,1]
z = np.array(lowDDataMat)[:,2]
ax = plt.figure().add_subplot(111, projection = '3d') 
ax.scatter(x, y, z, c = y_data, s = 10) # The point is a red triangle  

plt.show()

Insert picture description here

sklearn- Handwritten numeral dimensionality reduction prediction

from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn import decomposition
import matplotlib.pyplot as plt

digits = load_digits()# Load data 
x_data = digits.data # data 
y_data = digits.target # label 

x_train,x_test,y_train,y_test = train_test_split(x_data,y_data) # Split data 1/4 For test data ,3/4 For training data

mlp = MLPClassifier(hidden_layer_sizes=(100,50) ,max_iter=500)
mlp.fit(x_train,y_train )

Insert picture description here

predictions = mlp.predict(x_test)
print(classification_report(predictions, y_test))
print(confusion_matrix(predictions, y_test))

Insert picture description here

pca = decomposition.PCA()
pca.fit(x_data)

Insert picture description here

#  variance 
pca.explained_variance_

Insert picture description here

#  Proportion of variance 
pca.explained_variance_ratio_

Insert picture description here

variance = []
for i in range(len(pca.explained_variance_ratio_)):
    variance.append(sum(pca.explained_variance_ratio_[:i+1]))

plt.plot(range(1,len(pca.explained_variance_ratio_)+1), variance)
plt.show()

Insert picture description here

pca = decomposition.PCA(whiten=True,n_components=0.8)
pca.fit(x_data)

Insert picture description here

pca.explained_variance_ratio_

Insert picture description here

x_train_pca = pca.transform(x_train)

mlp = MLPClassifier(hidden_layer_sizes=(100,50) ,max_iter=500)
mlp.fit(x_train_pca,y_train )

Insert picture description here

x_test_pca = pca.transform(x_test)
predictions = mlp.predict(x_test_pca)
print(classification_report(predictions, y_test))
print(confusion_matrix(predictions, y_test))