当前位置：网站首页>Pytoch framework exercise (based on kaggle Titanic competition)

Pytoch framework exercise (based on kaggle Titanic competition)

2022-07-25 15:41:00 【whut_ L】

Catalog

2 be based on Pytorch Frame building FCNN

3 Model testing and generation of submission data

1 Data preprocessing

##  Training set and test set data preprocessing 

#  Import third-party library 
import numpy as np
import pandas as pd
import math

#  Data preprocessing functions 
def Data_Processing(Input_Dataset, Dataset_Type): 
    Dataset = pd.read_csv(Input_Dataset) #  Import dataset 
    print(Dataset.describe()) #  Data set description 
    print('The Feature of the Dataset is:', Dataset.head(n = 0))
    print('The shape of Dataset is:', Dataset.shape) # Dataset size 

    # Age Missing value fill 
    Dataset['Age'] = Dataset['Age'].fillna(Dataset['Age'].mean()) # Mean filling 
    
    # Fare Missing value fill 
    Dataset['Fare'] = Dataset['Fare'].fillna(Dataset['Fare'].mean()) # Mean filling 
    
    # Embarked Fill and map 
    #print(Dataset['Embarked'].unique()) # 'Embarked' Take the possible result ：['S' 'C' 'Q' nan]
    #print(Dataset['Embarked'].mode())   # 'Embarked' Mode of column ,‘S’
    Mode = Dataset['Embarked'].mode() #  The number of 
    Dataset['Embarked'] = Dataset['Embarked'].fillna(Mode[0])  #  Mode filling 
    Dataset.loc[Dataset['Embarked']=='S','Embarked'] = 0 #  mapping ：‘S’ -> 0
    Dataset.loc[Dataset['Embarked']=='C','Embarked'] = 1 #  mapping ：‘C’ -> 1
    Dataset.loc[Dataset['Embarked']=='Q','Embarked'] = 2 #  mapping ：‘Q’ -> 2
    
    # Sex mapping  ‘male’ -> 0 ‘female’ -> 1
    Dataset.loc[Dataset['Sex']== 'male', 'Sex'] = 0  #loc usage , Get data by label , The first parameter is line , The second parameter is the column 
    Dataset.loc[Dataset['Sex']== 'female', 'Sex'] = 1
    
    if Dataset_Type == 0:   #  The data set is a training set 
        Features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survived'] #  Training set selected Feature
        train_Dataset = Dataset[Features]
        train_Dataset.to_csv('Train_Dataset1.csv', index = False)  #  preservation , Ignore index
        
    elif Dataset_Type == 1: #  The data set is the test set 
        Features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'] #  Test set selected Feature
        Test_Dataset = Dataset[Features]
        Test_Dataset.to_csv('Test_Dataset1.csv', index = False) #  preservation , Ignore index


    
Input_Dataset = 'train.csv'   # Processing training sets 
Data_Processing(Input_Dataset , 0) # 0 Represents a training set 

Input_Dataset = 'test.csv'  # Processing test sets 
Data_Processing(Input_Dataset , 1) # 1 Represents a test set

2 be based on Pytorch Frame building FCNN

##  be based on Pytorch Frame training model 

#  Import third-party library 
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

#  Training set processing 
class Titanic_Train_Dataset(Dataset):
    def __init__(self, filepath):
        xy = np.loadtxt(filepath, delimiter = ',', dtype = np.float32, skiprows = 1) # file name , Data separator , data type 
        self.len = xy.shape[0] # Calculate the number of samples 
        self.x_data = torch.from_numpy(xy[:, :-1]) #  Read seven columns of characteristic data 
        self.y_data = torch.from_numpy(xy[:, [-1]]) #  Read the last column of tag data 

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index] # Returns the data of the specified index 

    def __len__(self):
        return self.len   #  The number of samples of the returned data 

#  All connected neural networks 
class FCNN_Model(torch.nn.Module): 
    def __init__(self):
        super(FCNN_Model, self).__init__() 
        self.linear1 = torch.nn.Linear(7, 5)
        self.linear2 = torch.nn.Linear(5, 3)
        self.linear3 = torch.nn.Linear(3, 1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.sigmoid(self.linear1(x)) #  Activation function 
        x = self.sigmoid(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x


def Mode_Train(Epoch, Epoch_loss): #  Model training 
    for epoch in range(Epoch):
        for i, data in enumerate(train_loader, 0): 

            #1.Prepare data  Take the data 
            inputs, labels = data   # Characteristic data column , Label data column 

            # 2.Forward  Forward direction 
            y_pred = model(inputs)
            #print(y_pred)
            loss = criterion(y_pred, labels) #  Calculate the loss 
            Epoch_loss[epoch] += loss.item() #  same Epoch The next loss is accumulated 

            #3.Backward  reverse 
            optimizer.zero_grad() #  Zero gradient 
            loss.backward() #  Calculate the gradient 

            #4.Updata  to update 
            optimizer.step()  # Parameter optimization 

        #print(epoch, Epoch_loss[epoch]) #  each Epoch Cumulative losses 
            
    return Epoch_loss


dataset = Titanic_Train_Dataset('Train_Dataset1.csv') #  Import dataset 
train_loader = DataLoader(dataset = dataset, batch_size = 64, shuffle = True, num_workers = 0) # Data sets , batch (batch) size , Whether to disturb , Number of processes 

model = FCNN_Model() #  Defining models 

criterion = torch.nn.BCELoss(size_average = True) #  Loss calculator 
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5) # Build optimizer ,lr For learning rate ,momentum Is the impulse factor 

Epoch = 2000 # Epoch size 
Epoch_loss = np.zeros([Epoch, 1]) #  Initialize each Epoch The loss of 
Epoch_loss = Mode_Train(Epoch, Epoch_loss) 
plt.plot(Epoch_loss)

3 Model testing and generation of submission data

##  Apply the trained model to the test set 

#  Define test functions 
def Mode_Test(test_loader): 
    with torch.no_grad():  #  The data does not calculate the gradient 
        for data in test_loader:
            outputs = model(data)
    return outputs #  Return results 

# State function , Convert passengers to ‘ Survival ’or‘ Death ’ The state of 
def State(Labels):
    for i, label in enumerate(Labels, 0):
        if label > 0.5: #  The probability is greater than 0.5 For survival 
            Labels[i] = 1
        else: #  The probability is less than 0.5 For death 
            Labels[i] = 0
    
    return Labels # Return the conversion result 

test_dataset = np.loadtxt('Test_Dataset1.csv', delimiter = ',', dtype = np.float32, skiprows = 1) #  Import training set 
print(test_dataset)
print('The Shape of test_dataset is:', np.shape(test_dataset))

test_loader = DataLoader(test_dataset, shuffle = False, batch_size = 418)
#print(np.size(test_dataset, 0))

Labels = Mode_Test(test_loader) #  test result 
Labels = Labels.numpy() #  convert to Numpy Array 
Labels = State(Labels) #  Status update 
Labels = Labels.astype(int) #  Convert to integer data 
print(Labels) # Output final results 



##  According to the submission template , Sort out the final submission data 
Submission = pd.read_csv('gender_submission.csv') #  Import submission template 
Submission['Survived'] = Labels  #  Import the final results into the template 
Submission.to_csv('Submission1.csv', index = False)  #  Generate submission data set

原网站

版权声明
本文为[whut_ L]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/206/202207251528313604.html

当前位置：网站首页>Pytoch framework exercise (based on kaggle Titanic competition)

Pytoch framework exercise (based on kaggle Titanic competition)

1 Data preprocessing

2 be based on Pytorch Frame building FCNN

3 Model testing and generation of submission data

边栏推荐

猜你喜欢

随机推荐