当前位置:网站首页>Data loading and preprocessing
Data loading and preprocessing
2022-07-01 04:46:00 【CyrusMay】
Pytorch( Four ) —— Data preprocessing
1. Use torch.utils.data.Dataset Read the data
- Read data by inheriting this class
The file path is :
import torch
from torch.utils.data import Dataset,DataLoader
import os
import csv
import glob
import random
from PIL import Image
from torchvision import transforms
import visdom
from torchvision.datasets import ImageFolder
class AnimalData(Dataset):
def __init__(self,root,resize = [28,28],mode="train"):
super(AnimalData,self).__init__()
self.root = root
self.resize = resize # [h,w]
# Get the labels of each category according to the name of the subfolder
self.class2label = {
}
for name in sorted(os.listdir(os.path.join(self.root))):
if not os.path.isdir(os.path.join(self.root,name)):
continue
self.class2label[name] = len(self.class2label.keys())
print(self.class2label)
# from csv The storage path and label of the loaded data in the file
images,labels = self.load_csv("animal.csv")
# According to the requirements of the task , Return the data
if mode == "train":
self.images = images[:int(0.6*len(images))]
self.labels = labels[:int(0.6*len(images))]
elif mode == "val":
self.images = images[int(0.6 * len(images)):int(0.8 * len(images))]
self.labels = labels[int(0.6 * len(images)):int(0.8 * len(images))]
elif mode == "test":
self.images = images[int(0.8 * len(images)):]
self.labels = labels[int(0.8 * len(images)):]
def load_csv(self,file_name):
if not os.path.exists(file_name):
images = []
for name in self.class2label.keys():
# glob.glob() Method can match the files in this path , Return to the full path
images += glob.glob(os.path.join(self.root,name,"*.png"))
images += glob.glob(os.path.join(self.root,name,".jpg"))
# Scrambling data
random.shuffle(images)
# write in csv file , Easy to read next time
with open(file_name,"w",encoding="utf-8",newline="") as f:
writer = csv.writer(f)
for path in images:
name = path.split(os.sep)[1]
label = self.class2label[name]
writer.writerow([path,label])
# adopt csv Load data
with open(file_name,"r",encoding="utf-8") as f:
reader = csv.reader(f)
images = []
labels = []
for line in reader:
images.append(line[0])
labels.append(int(line[1]))
return images,labels
# Override the method , Returns the data size
def __len__(self):
return len(self.images)
# Anti standardization , Easy to visualize
def de_normalize(self,x_hat):
mean = torch.tensor([0.485, 0.456, 0.406]).unsqueeze(1).unsqueeze(1)
std = torch.tensor([0.229, 0.224, 0.225]).unsqueeze(1).unsqueeze(1)
x = x_hat *std + mean
return x
# Override the method , return Tensor Format data and labels
def __getitem__(self,idx):
label = torch.tensor(self.labels[idx])
tf = transforms.Compose([
lambda x: Image.open(x).convert("RGB"), # Read the picture
transforms.Resize([int(self.resize[0]*1.25),int(self.resize[1]*1.25)]),
transforms.RandomRotation(15), # Data to enhance
transforms.CenterCrop(self.resize), # Centralized cutting
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
image = tf(self.images[idx])
return image,label
if __name__ == '__main__':
resize = [128,100]
db = AnimalData(root="animal",resize=resize)
{'cat': 0, 'dog': 1, 'rabbit': 2}
2. Use torch.utils.data.DataLoader Load data
if __name__ == '__main__':
resize = [128,100]
db = AnimalData(root="animal",resize=resize)
it_db = iter(db)
vis = visdom.Visdom()
image,label = next(it_db)
vis.image(db.de_normalize(image),win="iter_image",opts=dict(title="iter_image"))
# Using a data loader , Set up batch
loader = DataLoader(dataset=db,batch_size=16,shuffle=True,num_workers=8) # num_workers The parameter is multi thread reading data
for x,y in loader:
vis.images(db.de_normalize(x),win="batch_imags",nrow=4,opts=dict(title="batch"))

3. Use torchvision.datasets.ImageFolder For fast data reading
# ImageFolder The above process can be realized in one step
tf = transforms.Compose([
transforms.Resize([int(resize[0] * 1.25), int(resize[1] * 1.25)]),
transforms.RandomRotation(15), # Data to enhance
transforms.CenterCrop(resize), # Centralized cutting
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
db = ImageFolder(root = "animal",
transform=tf)
by CyrusMay 2022 06 30
How many twists and turns do you have in your life
To go to the other side of happiness
can Live this life without regret
Ordinary but not plain
—————— May day ( Qingkong future )——————
边栏推荐
- 常用的Transforms中的方法
- The index is invalid
- LeetCode_66(加一)
- The longest increasing subsequence and its optimal solution, total animal weight problem
- How to do the performance pressure test of "Health Code"
- Neural network convolution layer
- Common interview questions ①
- Construction of Meizhou nursing laboratory: equipment configuration
- Matters behind the construction of paint testing laboratory
- 2022 polymerization process test questions and simulation test
猜你喜欢

C#读写应用程序配置文件App.exe.config,并在界面上显示

JVM栈和堆简介

Odeint et GPU

C read / write application configuration file app exe. Config and display it on the interface

About the transmission pipeline of stage in spark

Neural network - nonlinear activation

RuntimeError: “max_pool2d“ not implemented for ‘Long‘

Introduction to JVM stack and heap

Use and modification of prior network model

STM32 光敏电阻传感器&两路AD采集
随机推荐
Openresty rewrites the location of 302
C - detailed explanation of operators and summary of use cases
神经网络-卷积层
VR线上展览所具备应用及特色
科研狗可能需要的一些工具
JVM栈和堆简介
LeetCode_ 58 (length of last word)
Use of dataloader
The index is invalid
Software testing needs more and more talents. Why do you still not want to take this path?
js解决浮点数相乘精度丢失问题
STM32 extended key scan
Pytorch(三) —— 函数优化
Extension fragment
Leecode question brushing record 1332 delete palindrome subsequence
2022 G2 power station boiler stoker examination question bank and G2 power station boiler stoker simulation examination question bank
Summary of testing experience - Testing Theory
LeetCode_66(加一)
[hard ten treasures] - 1 [basic knowledge] classification of power supply
RuntimeError: mean(): input dtype should be either floating point or complex dtypes.Got Long instead