当前位置：网站首页>Machine learning Basics - integrated learning-13

Machine learning Basics - integrated learning-13

2022-07-28 13:04:00 【gemoumou】

Integrated learning Ensemble Learning

Insert picture description here

bagging

#  Import algorithm package and data set 
from sklearn import neighbors
from sklearn import datasets
from sklearn.ensemble import BaggingClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

iris = datasets.load_iris()
x_data = iris.data[:,:2]
y_data = iris.target

x_train,x_test,y_train,y_test = train_test_split(x_data, y_data)

knn = neighbors.KNeighborsClassifier()
knn.fit(x_train, y_train)

Insert picture description here

def plot(model):
    #  Get the range of data values 
    x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
    y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1

    #  Generate grid matrix 
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))

    z = model.predict(np.c_[xx.ravel(), yy.ravel()])# ravel And flatten similar , Multidimensional data to one dimension .flatten Will not change the original data ,ravel Will change the original data 
    z = z.reshape(xx.shape)
    #  Contour map 
    cs = plt.contourf(xx, yy, z)

#  drawing 
plot(knn)
#  Sample scatter plot 
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
#  Accuracy rate 
knn.score(x_test, y_test)

Insert picture description here

dtree = tree.DecisionTreeClassifier()
dtree.fit(x_train, y_train)

Insert picture description here

#  drawing 
plot(dtree)
#  Sample scatter plot 
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
#  Accuracy rate 
dtree.score(x_test, y_test)

Insert picture description here

bagging_knn = BaggingClassifier(knn, n_estimators=100)
#  Input data to build model 
bagging_knn.fit(x_train, y_train)
plot(bagging_knn)
#  Sample scatter plot 
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
bagging_knn.score(x_test, y_test)

Insert picture description here

bagging_tree = BaggingClassifier(dtree, n_estimators=100)
#  Input data to build model 
bagging_tree.fit(x_train, y_train)
plot(bagging_tree)
#  Sample scatter plot 
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
bagging_tree.score(x_test, y_test)

Insert picture description here

Random forests (Random Forest)

Insert picture description here

from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import matplotlib.pyplot as plt

#  Load data 
data = np.genfromtxt("LR-testSet2.txt", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1]

plt.scatter(x_data[:,0],x_data[:,1],c=y_data)
plt.show()

Insert picture description here

x_train,x_test,y_train,y_test = train_test_split(x_data, y_data, test_size = 0.5)

def plot(model):
    #  Get the range of data values 
    x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
    y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1

    #  Generate grid matrix 
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))

    z = model.predict(np.c_[xx.ravel(), yy.ravel()])# ravel And flatten similar , Multidimensional data to one dimension .flatten Will not change the original data ,ravel Will change the original data 
    z = z.reshape(xx.shape)
    #  Contour map 
    cs = plt.contourf(xx, yy, z)
    #  Sample scatter plot 
    plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test)
    plt.show()

dtree = tree.DecisionTreeClassifier()
dtree.fit(x_train, y_train)
plot(dtree)
dtree.score(x_test, y_test)

Insert picture description here

RF = RandomForestClassifier(n_estimators=50)
RF.fit(x_train, y_train)
plot(RF)
RF.score(x_test, y_test)

Insert picture description here

boosting

Insert picture description here

import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_gaussian_quantiles
from sklearn.metrics import classification_report

#  Generate 2 One dimensional normal distribution , The generated data is divided into two categories by Quantile ,500 Samples ,2 Sample characteristics 
x1, y1 = make_gaussian_quantiles(n_samples=500, n_features=2,n_classes=2)
#  Generate 2 One dimensional normal distribution , The generated data is divided into two categories by Quantile ,400 Samples ,2 The characteristic mean values of all samples are 3
x2, y2 = make_gaussian_quantiles(mean=(3, 3), n_samples=500, n_features=2, n_classes=2)
#  Combine two sets of data into a set of data 
x_data = np.concatenate((x1, x2))
y_data = np.concatenate((y1, - y2 + 1))

plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

Insert picture description here

#  Decision tree model 
model = tree.DecisionTreeClassifier(max_depth=3)

#  Input data to build model 
model.fit(x_data, y_data)

#  Get the range of data values 
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1

#  Generate grid matrix 
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

z = model.predict(np.c_[xx.ravel(), yy.ravel()])# ravel And flatten similar , Multidimensional data to one dimension .flatten Will not change the original data ,ravel Will change the original data 
z = z.reshape(xx.shape)
#  Contour map 
cs = plt.contourf(xx, yy, z)
#  Sample scatter plot 
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

Insert picture description here

#  Model accuracy 
model.score(x_data,y_data)

Insert picture description here

# AdaBoost Model 
model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),n_estimators=10)
#  Training models 
model.fit(x_data, y_data)

#  Get the range of data values 
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1

#  Generate grid matrix 
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

#  Get predictions 
z = model.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
#  Contour map 
cs = plt.contourf(xx, yy, z)
#  Sample scatter plot 
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

Insert picture description here

Stacking

Insert picture description here

from sklearn import datasets  
from sklearn import model_selection  
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier  
from sklearn.tree import DecisionTreeClassifier
from mlxtend.classifier import StackingClassifier # pip install mlxtend
import numpy as np

#  Loading data sets 
iris = datasets.load_iris()  
#  As long as the first 1,2 Characteristics of columns 
x_data, y_data = iris.data[:, 1:3], iris.target  

#  Define three different classifiers 
clf1 = KNeighborsClassifier(n_neighbors=1)  
clf2 = DecisionTreeClassifier() 
clf3 = LogisticRegression()  
 
#  Define a secondary classifier 
lr = LogisticRegression()  
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],   
                          meta_classifier=lr)
  
for clf,label in zip([clf1, clf2, clf3, sclf],
                      ['KNN','Decision Tree','LogisticRegression','StackingClassifier']):  
    scores = model_selection.cross_val_score(clf, x_data, y_data, cv=3, scoring='accuracy')  
    print("Accuracy: %0.2f [%s]" % (scores.mean(), label))

Insert picture description here

Voting

from sklearn import datasets  
from sklearn import model_selection  
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier  
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
import numpy as np

#  Loading data sets 
iris = datasets.load_iris()  
#  As long as the first 1,2 Characteristics of columns 
x_data, y_data = iris.data[:, 1:3], iris.target  

#  Define three different classifiers 
clf1 = KNeighborsClassifier(n_neighbors=1)  
clf2 = DecisionTreeClassifier() 
clf3 = LogisticRegression()  

sclf = VotingClassifier([('knn',clf1),('dtree',clf2), ('lr',clf3)])   
  
for clf, label in zip([clf1, clf2, clf3, sclf],
                      ['KNN','Decision Tree','LogisticRegression','VotingClassifier']):  
  
    scores = model_selection.cross_val_score(clf, x_data, y_data, cv=3, scoring='accuracy')  
    print("Accuracy: %0.2f [%s]" % (scores.mean(), label))

Insert picture description here

Titanic crew rescue prediction project

Insert picture description here

import pandas 
titanic = pandas.read_csv("titanic_train.csv")
titanic

Insert picture description here

#  spare age Fill the whole age The median of 
titanic["Age"] = titanic["Age"].fillna(titanic["Age"].median())
print(titanic.describe())

Insert picture description here

print(titanic["Sex"].unique())

#  hold male become 0, hold female become 1
titanic.loc[titanic["Sex"] == "male", "Sex"] = 0
titanic.loc[titanic["Sex"] == "female", "Sex"] = 1

Insert picture description here

print(titanic["Embarked"].unique())
#  Data filling 
titanic["Embarked"] = titanic["Embarked"].fillna('S')
#  Change categories into numbers 
titanic.loc[titanic["Embarked"] == "S", "Embarked"] = 0
titanic.loc[titanic["Embarked"] == "C", "Embarked"] = 1
titanic.loc[titanic["Embarked"] == "Q", "Embarked"] = 2

Insert picture description here

from sklearn.preprocessing import StandardScaler

#  Selected features 
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
x_data = titanic[predictors]
y_data = titanic["Survived"]

#  Data standardization 
scaler = StandardScaler()
x_data = scaler.fit_transform(x_data)

Logical regression

from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression

#  Logistic regression model 
LR = LogisticRegression()
#  Calculate the error of cross validation 
scores = cross_validation.cross_val_score(LR, x_data, y_data, cv=3)
#  Averaging 
print(scores.mean())

Insert picture description here
neural network

from sklearn.neural_network import MLPClassifier

#  modeling 
mlp = MLPClassifier(hidden_layer_sizes=(20,10),max_iter=1000)
#  Calculate the error of cross validation 
scores = cross_validation.cross_val_score(mlp, x_data, y_data, cv=3)
#  Averaging 
print(scores.mean())

Insert picture description here
KNN

from sklearn import neighbors

knn = neighbors.KNeighborsClassifier(21)
#  Calculate the error of cross validation 
scores = cross_validation.cross_val_score(knn, x_data, y_data, cv=3)
#  Averaging 
print(scores.mean())

Insert picture description here
Decision tree

from sklearn import tree

#  Decision tree model 
dtree = tree.DecisionTreeClassifier(max_depth=5, min_samples_split=4)
#  Calculate the error of cross validation 
scores = cross_validation.cross_val_score(dtree, x_data, y_data, cv=3)
#  Averaging 
print(scores.mean())

Insert picture description here
Random forests

#  Random forests 
from sklearn.ensemble import RandomForestClassifier

RF1 = RandomForestClassifier(random_state=1, n_estimators=10, min_samples_split=2)
#  Calculate the error of cross validation 
scores = cross_validation.cross_val_score(RF1, x_data, y_data, cv=3)
#  Averaging 
print(scores.mean())

Insert picture description here

RF2 = RandomForestClassifier(n_estimators=100, min_samples_split=4)
#  Calculate the error of cross validation 
scores = cross_validation.cross_val_score(RF2, x_data, y_data, cv=3)
#  Averaging 
print(scores.mean())