当前位置：网站首页>Code for generating test and training sets

Code for generating test and training sets

2022-06-30 03:01:00 【Master Hao】

coding: utf-8

In[1]:

import os
import random
import shutil
import numpy as np

In[2]:

Dataset path , The original data set is stored in one folder per category ： Such as "D:/17flowers" There are several category folders .

DATASET_DIR = “D:/17flowers”

Storage path after data segmentation , Be careful , The two paths cannot coincide

NEW_DIR = “D:/17flowersTrainDatas/data”

Percentage of test sets

num_test = 0.2

In[3]:

Disarrange all kinds of data , And split the training set and the test set

def shuffle_all_files(dataset_dir, new_dir, num_test):
# Delete existing new_dir Folder
if not os.path.exists(new_dir):
pass
else:
# Recursively delete folders
shutil.rmtree(new_dir)
# Recreate new_dir Folder
os.makedirs(new_dir)
# stay new_dir Create under folder directory train Folder
train_dir = os.path.join(new_dir, ‘train’)
os.makedirs(train_dir)
# stay new_dir Create under folder directory test Folder
test_dir = os.path.join(new_dir, ‘test’)
os.makedirs(test_dir)
# Raw data category list
directories = []
# New training set category list
train_directories = []
# New test set category list
test_directories = []
# List of category names
class_names = []
# Cycle through all categories
for filename in os.listdir(dataset_dir):
# Original data category path
path = os.path.join(dataset_dir, filename)
# New training set category path
train_path = os.path.join(train_dir, filename)
# New test set category path
test_path = os.path.join(test_dir, filename)
# Determine whether the path is a folder
if os.path.isdir(path):
# Add the original data category list
directories.append(path)
# Add a new training set category list
train_directories.append(train_path)
# New category folder
os.makedirs(train_path)
# Add new test set category list
test_directories.append(test_path)
# New category folder
os.makedirs(test_path)
# Add category name list
class_names.append(filename)
print(‘ List of categories ：’,class_names)

#  Loop through each classified folder 
for i in range(len(directories)):
    #  Save original picture path 
    photo_filenames = []
    #  Save the new training set picture path 
    train_photo_filenames = []
    #  Save new test set image path 
    test_photo_filenames = []
    #  Get the path of all the pictures 
    for filename in os.listdir(directories[i]):
        #  Original image path 
        path = os.path.join(directories[i], filename)
        #  Training picture path 
        train_path = os.path.join(train_directories[i], filename)
        #  Test set picture path 
        test_path = os.path.join(test_directories[i], filename)
        #  Save image path 
        photo_filenames.append(path)
        train_photo_filenames.append(train_path)
        test_photo_filenames.append(test_path)
    # list turn array
    photo_filenames = np.array(photo_filenames)
    train_photo_filenames = np.array(train_photo_filenames)
    test_photo_filenames = np.array(test_photo_filenames)
    #  Scramble index 
    index = [i for i in range(len(photo_filenames))] 
    random.shuffle(index)
    #  Yes 3 individual list Make the same disruption , Guaranteed at 3 individual list Consistent index in 
    photo_filenames = photo_filenames[index]
    train_photo_filenames = train_photo_filenames[index]
    test_photo_filenames = test_photo_filenames[index]
    #  Calculate the number of test set data 
    test_sample_index = int((1-num_test) * float(len(photo_filenames)))
    #  Copy test set pictures 
    for j in range(test_sample_index, len(photo_filenames)):
        #  Copy the picture 
        shutil.copyfile(photo_filenames[j], test_photo_filenames[j])
    #  Copy training set pictures 
    for j in range(0, test_sample_index):
        #  Copy the picture 
        shutil.copyfile(photo_filenames[j], train_photo_filenames[j])