当前位置:网站首页>Machine learning Basics - integrated learning-13
Machine learning Basics - integrated learning-13
2022-07-28 13:04:00 【gemoumou】
Integrated learning Ensemble Learning






bagging
# Import algorithm package and data set
from sklearn import neighbors
from sklearn import datasets
from sklearn.ensemble import BaggingClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
iris = datasets.load_iris()
x_data = iris.data[:,:2]
y_data = iris.target
x_train,x_test,y_train,y_test = train_test_split(x_data, y_data)
knn = neighbors.KNeighborsClassifier()
knn.fit(x_train, y_train)

def plot(model):
# Get the range of data values
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
# Generate grid matrix
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
z = model.predict(np.c_[xx.ravel(), yy.ravel()])# ravel And flatten similar , Multidimensional data to one dimension .flatten Will not change the original data ,ravel Will change the original data
z = z.reshape(xx.shape)
# Contour map
cs = plt.contourf(xx, yy, z)
# drawing
plot(knn)
# Sample scatter plot
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
# Accuracy rate
knn.score(x_test, y_test)

dtree = tree.DecisionTreeClassifier()
dtree.fit(x_train, y_train)

# drawing
plot(dtree)
# Sample scatter plot
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
# Accuracy rate
dtree.score(x_test, y_test)

bagging_knn = BaggingClassifier(knn, n_estimators=100)
# Input data to build model
bagging_knn.fit(x_train, y_train)
plot(bagging_knn)
# Sample scatter plot
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
bagging_knn.score(x_test, y_test)

bagging_tree = BaggingClassifier(dtree, n_estimators=100)
# Input data to build model
bagging_tree.fit(x_train, y_train)
plot(bagging_tree)
# Sample scatter plot
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
bagging_tree.score(x_test, y_test)

Random forests (Random Forest)


from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import matplotlib.pyplot as plt
# Load data
data = np.genfromtxt("LR-testSet2.txt", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1]
plt.scatter(x_data[:,0],x_data[:,1],c=y_data)
plt.show()

x_train,x_test,y_train,y_test = train_test_split(x_data, y_data, test_size = 0.5)
def plot(model):
# Get the range of data values
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
# Generate grid matrix
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
z = model.predict(np.c_[xx.ravel(), yy.ravel()])# ravel And flatten similar , Multidimensional data to one dimension .flatten Will not change the original data ,ravel Will change the original data
z = z.reshape(xx.shape)
# Contour map
cs = plt.contourf(xx, yy, z)
# Sample scatter plot
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test)
plt.show()
dtree = tree.DecisionTreeClassifier()
dtree.fit(x_train, y_train)
plot(dtree)
dtree.score(x_test, y_test)

RF = RandomForestClassifier(n_estimators=50)
RF.fit(x_train, y_train)
plot(RF)
RF.score(x_test, y_test)

boosting







import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_gaussian_quantiles
from sklearn.metrics import classification_report
# Generate 2 One dimensional normal distribution , The generated data is divided into two categories by Quantile ,500 Samples ,2 Sample characteristics
x1, y1 = make_gaussian_quantiles(n_samples=500, n_features=2,n_classes=2)
# Generate 2 One dimensional normal distribution , The generated data is divided into two categories by Quantile ,400 Samples ,2 The characteristic mean values of all samples are 3
x2, y2 = make_gaussian_quantiles(mean=(3, 3), n_samples=500, n_features=2, n_classes=2)
# Combine two sets of data into a set of data
x_data = np.concatenate((x1, x2))
y_data = np.concatenate((y1, - y2 + 1))
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

# Decision tree model
model = tree.DecisionTreeClassifier(max_depth=3)
# Input data to build model
model.fit(x_data, y_data)
# Get the range of data values
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
# Generate grid matrix
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
z = model.predict(np.c_[xx.ravel(), yy.ravel()])# ravel And flatten similar , Multidimensional data to one dimension .flatten Will not change the original data ,ravel Will change the original data
z = z.reshape(xx.shape)
# Contour map
cs = plt.contourf(xx, yy, z)
# Sample scatter plot
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

# Model accuracy
model.score(x_data,y_data)

# AdaBoost Model
model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),n_estimators=10)
# Training models
model.fit(x_data, y_data)
# Get the range of data values
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
# Generate grid matrix
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
# Get predictions
z = model.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
# Contour map
cs = plt.contourf(xx, yy, z)
# Sample scatter plot
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

Stacking


from sklearn import datasets
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from mlxtend.classifier import StackingClassifier # pip install mlxtend
import numpy as np
# Loading data sets
iris = datasets.load_iris()
# As long as the first 1,2 Characteristics of columns
x_data, y_data = iris.data[:, 1:3], iris.target
# Define three different classifiers
clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = DecisionTreeClassifier()
clf3 = LogisticRegression()
# Define a secondary classifier
lr = LogisticRegression()
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
meta_classifier=lr)
for clf,label in zip([clf1, clf2, clf3, sclf],
['KNN','Decision Tree','LogisticRegression','StackingClassifier']):
scores = model_selection.cross_val_score(clf, x_data, y_data, cv=3, scoring='accuracy')
print("Accuracy: %0.2f [%s]" % (scores.mean(), label))


Voting
from sklearn import datasets
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
import numpy as np
# Loading data sets
iris = datasets.load_iris()
# As long as the first 1,2 Characteristics of columns
x_data, y_data = iris.data[:, 1:3], iris.target
# Define three different classifiers
clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = DecisionTreeClassifier()
clf3 = LogisticRegression()
sclf = VotingClassifier([('knn',clf1),('dtree',clf2), ('lr',clf3)])
for clf, label in zip([clf1, clf2, clf3, sclf],
['KNN','Decision Tree','LogisticRegression','VotingClassifier']):
scores = model_selection.cross_val_score(clf, x_data, y_data, cv=3, scoring='accuracy')
print("Accuracy: %0.2f [%s]" % (scores.mean(), label))

Titanic crew rescue prediction project

import pandas
titanic = pandas.read_csv("titanic_train.csv")
titanic

# spare age Fill the whole age The median of
titanic["Age"] = titanic["Age"].fillna(titanic["Age"].median())
print(titanic.describe())


print(titanic["Sex"].unique())
# hold male become 0, hold female become 1
titanic.loc[titanic["Sex"] == "male", "Sex"] = 0
titanic.loc[titanic["Sex"] == "female", "Sex"] = 1


print(titanic["Embarked"].unique())
# Data filling
titanic["Embarked"] = titanic["Embarked"].fillna('S')
# Change categories into numbers
titanic.loc[titanic["Embarked"] == "S", "Embarked"] = 0
titanic.loc[titanic["Embarked"] == "C", "Embarked"] = 1
titanic.loc[titanic["Embarked"] == "Q", "Embarked"] = 2


from sklearn.preprocessing import StandardScaler
# Selected features
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
x_data = titanic[predictors]
y_data = titanic["Survived"]
# Data standardization
scaler = StandardScaler()
x_data = scaler.fit_transform(x_data)
Logical regression
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
# Logistic regression model
LR = LogisticRegression()
# Calculate the error of cross validation
scores = cross_validation.cross_val_score(LR, x_data, y_data, cv=3)
# Averaging
print(scores.mean())

neural network
from sklearn.neural_network import MLPClassifier
# modeling
mlp = MLPClassifier(hidden_layer_sizes=(20,10),max_iter=1000)
# Calculate the error of cross validation
scores = cross_validation.cross_val_score(mlp, x_data, y_data, cv=3)
# Averaging
print(scores.mean())

KNN
from sklearn import neighbors
knn = neighbors.KNeighborsClassifier(21)
# Calculate the error of cross validation
scores = cross_validation.cross_val_score(knn, x_data, y_data, cv=3)
# Averaging
print(scores.mean())

Decision tree
from sklearn import tree
# Decision tree model
dtree = tree.DecisionTreeClassifier(max_depth=5, min_samples_split=4)
# Calculate the error of cross validation
scores = cross_validation.cross_val_score(dtree, x_data, y_data, cv=3)
# Averaging
print(scores.mean())

Random forests
# Random forests
from sklearn.ensemble import RandomForestClassifier
RF1 = RandomForestClassifier(random_state=1, n_estimators=10, min_samples_split=2)
# Calculate the error of cross validation
scores = cross_validation.cross_val_score(RF1, x_data, y_data, cv=3)
# Averaging
print(scores.mean())

RF2 = RandomForestClassifier(n_estimators=100, min_samples_split=4)
# Calculate the error of cross validation
scores = cross_validation.cross_val_score(RF2, x_data, y_data, cv=3)
# Averaging
print(scores.mean())



边栏推荐
- [embedded C foundation] Part 8: explanation of C language array
- 2020-12-13
- Leetcode94. Middle order traversal of binary trees
- 05 pyechars basic chart (example code + effect diagram)
- Shenwenbo, researcher of the Hundred Talents Program of Zhejiang University: kernel security in the container scenario
- Solution to using json.tojsonstring to display question marks in Chinese in Servlet
- Machine learning Basics - decision tree-12
- 机器学习基础-主成分分析PCA-16
- Detailed explanation of the usage of C # static
- What if win11 cannot recognize Ethernet
猜你喜欢

Ccf201912-2 recycling station site selection

How many times can the WordPress user name be changed? Attach the method of changing user name

Machine learning practice - integrated learning-23

Unity installs the device simulator
![[graduation design] oscilloscope design and Implementation Based on STM32 - single chip microcomputer Internet of things](/img/c7/87027ded6d9e522952827fae0fccbd.png)
[graduation design] oscilloscope design and Implementation Based on STM32 - single chip microcomputer Internet of things

Quick read in

The essence of enterprise Digitalization

【嵌入式C基础】第5篇:原码/反码/补码

How can non-standard automation equipment enterprises do well in product quality management with the help of ERP system?

Machine learning practice - neural network-21
随机推荐
LeetCode94. 二叉树的中序遍历
mysql limit 分页优化
[embedded C foundation] Part 9: basic usage of C language pointer
【嵌入式C基础】第1篇:基本数据类型
[error prone points of C language] Part 4: detailed rules for storing structures in memory
如何在 TiDB Cloud 上使用 Databricks 进行数据分析 | TiDB Cloud 使用指南
[Bi design teaching] STM32 and FreeRTOS realize low power consumption
CCF201912-2 回收站选址
Uniapp 应用开机自启插件 Ba-Autoboot
Shenwenbo, researcher of the Hundred Talents Program of Zhejiang University: kernel security in the container scenario
Brother bird talks about cloud native security best practices
机器学习实战-集成学习-23
Machine learning practice - neural network-21
Brief introduction to JS operator
【嵌入式C基础】第8篇:C语言数组讲解
Scala transformation, filtering, grouping, sorting
Li FuPan: application practice of kata safety container in ant group
Science heavyweight: AI design protein has made another breakthrough, and it can design specific functional proteins
Installation and reinstallation of win11 system graphic version tutorial
【嵌入式C基础】第3篇:常量和变量