当前位置：网站首页>Sklearn machine learning foundation (linear regression, under fitting, over fitting, ridge regression, model loading and saving)

Sklearn machine learning foundation (linear regression, under fitting, over fitting, ridge regression, model loading and saving)

2022-07-26 08:48:00 【Natural color】

Catalog

1. Linear model

1.1 Loss function ：

1.2 The normal equation of the least squares method

1.3 Gradient descent of least square method （ Universal ）

1.4 Normal equation predicts Boston house price

1.5 Gradient decline predicts Boston house prices

1.6 Regression performance evaluation

2. Under fitting and over fitting

2.1 resolvent

Under fitting ：

Over fitting ：

3. Ridge return （ Linear regression with regularization ）

4. Model loading and saving

1. Linear model

The multiplication of matrix meets the requirements of linear regression operation

1.1 Loss function ：

The process of optimization and iteration Is to seek the most suitable Weighted The process

1.2 The normal equation of the least squares method

1.3 Gradient descent of least square method （ Universal ）

1.4 Normal equation predicts Boston house price

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


def myliner():
    '''
    #  Linear regression prediction of house price 
    :return:
    '''
    #  get data 
    lb = load_boston()
    #  Split data 
    x_train, x_test, y_train, y_test = train_test_split(lb.data, \
lb.target, test_size=0.2)
    #  Standardization    Whether the target value should be standardized     need ！！
    #  Eigenvalue standardization 
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)
    #  Target value standardization 
    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train.reshape(-1, 1))  #  Change the target value into two dimensions 
    y_test = std_y.transform(y_test.reshape(-1, 1))
    #  forecast 
    #  Normal equation solving prediction 
    lr = LinearRegression()
    lr.fit(x_train, y_train)
    print(lr.coef_)
    #   Predict the house price of the test set    The previous standardization , Through here inverse  Then switch back 
    y_predict = std_y.inverse_transform(lr.predict(x_test))
    print(" The predicted price of each house in the test set ：", y_predict)

    return None


if __name__ == '__main__':
    myliner()

1.5 Gradient decline predicts Boston house prices

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


def myliner():
    '''
    #  Linear regression prediction of house price 
    :return:
    '''
    #  get data 
    lb = load_boston()
    #  Split data 
    x_train, x_test, y_train, y_test = train_test_split(lb.data, lb.target, test_size=0.2)
    #  Standardization    Whether the target value should be standardized     need ！！
    #  Eigenvalue standardization 
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)
    #  Target value standardization 
    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train.reshape(-1, 1))  #  Change the target value into two dimensions 
    y_test = std_y.transform(y_test.reshape(-1, 1))
    #  forecast 
    #  Gradient descent solution prediction 
    sgd = SGDRegressor()
    sgd.fit(x_train, y_train)
    print(sgd.coef_)
    #   Predict the house price of the test set    The previous standardization , Through here inverse  Then switch back 
    y_predict = std_y.inverse_transform(sgd.predict(x_test))
    print(" The predicted price of each house in the test set ：", y_predict)

    return None


if __name__ == '__main__':
    myliner()

1.6 Regression performance evaluation

Small data ：LinearRegression( You can't Solve the fitting problem ) And others

Large scale data ：SGDRegressor

sklearn.metrics.mean_squared_error

2. Under fitting and over fitting

2.1 resolvent

Under fitting ：

Learning too few features of data , should Increase the number of features in the data

Over fitting ：

Too many original features , There are some noisy features , The model is too complex because it tries to consider all the test data points

solve ：

Make feature selection , Eliminate the characteristics of great relevance ( It's hard to do )

Cross validation ( Get all the data trained ) test

L2 Regularization ( understand )： Reduce the weight of higher-order terms

3. Ridge return （ Linear regression with regularization ）

sklearn.linear_model.Ridge

The greater the regularization , The smaller the weight value, the closer to 0

From regression The regression coefficient is more practical , More reliable . in addition , It can make the fluctuation range of estimation parameters smaller , Become more stable . It has great practical value in the research of morbid data .

from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error


def myliner():
    '''
    #  Linear regression prediction of house price 
    :return:
    '''
    #  get data 
    lb = load_boston()
    #  Split data 
    x_train, x_test, y_train, y_test = train_test_split(lb.data, lb.target, test_size=0.2)
    #  Standardization    Whether the target value should be standardized     need ！！
    #  Eigenvalue standardization 
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)
    #  Target value standardization 
    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train.reshape(-1, 1))  #  Change the target value into two dimensions 
    y_test = std_y.transform(y_test.reshape(-1, 1))
    #  forecast 
    #  Ridge regression solves the prediction 
    rd = Ridge(alpha=1.0)
    rd.fit(x_train, y_train)
    print(rd.coef_)
    #   Predict the house price of the test set    The previous standardization , Through here inverse  Then switch back 
    y_predict = std_y.inverse_transform(rd.predict(x_test))
    print(" The predicted price of each house in the test set ：", y_predict)
    print(" Mean square error of ridge regression ：", mean_squared_error(std_y.inverse_transform(y_test), y_predict))

    return None


if __name__ == '__main__':
    myliner()

4. Model loading and saving

#  Save the trained model 
with open('rd.pickle', 'wb') as fw:
    pickle.dump(rd, fw)
#  Load model 
with open('rd.pickle', 'rb') as fr:
    new_rd = pickle.load(fr)

from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import pickle


def myliner():
    '''
    #  Linear regression prediction of house price 
    :return:
    '''
    #  get data 
    lb = load_boston()
    #  Split data 
    x_train, x_test, y_train, y_test = train_test_split(lb.data, lb.target, test_size=0.2)
    #  Standardization    Whether the target value should be standardized     need ！！
    #  Eigenvalue standardization 
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)
    #  Target value standardization 
    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train.reshape(-1, 1))  #  Change the target value into two dimensions 
    y_test = std_y.transform(y_test.reshape(-1, 1))
    #  forecast 
    #  Ridge regression solves the prediction 
    rd = Ridge(alpha=1.0)
    rd.fit(x_train, y_train)
    print(rd.coef_)
    #  Save the trained model 
    with open('rd.pickle', 'wb') as fw:
        pickle.dump(rd, fw)
    #  Load model 
    with open('rd.pickle', 'rb') as fr:
        new_rd = pickle.load(fr)
    #   Predict the house price of the test set    The previous standardization , Through here inverse  Then switch back 
    y_predict = std_y.inverse_transform(new_rd.predict(x_test))
    print(" The predicted price of each house in the test set ：", y_predict)
    print(" Mean square error of ridge regression ：", mean_squared_error(std_y.inverse_transform(y_test), y_predict))

    return None


if __name__ == '__main__':
    myliner()

原网站

版权声明
本文为[Natural color]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/207/202207260838129866.html